Python Variable.get 예제들, airflow.models.Variable.get Python 예제들

예제 #1

0

파일 보기

파일: cli.py 프로젝트: cjquinon/incubator-airflow

def variables(args):

    if args.get:
        try:
            var = Variable.get(args.get,
                               deserialize_json=args.json,
                               default_var=args.default)
            print(var)
        except ValueError as e:
            print(e)
    if args.delete:
        session = settings.Session()
        session.query(Variable).filter_by(key=args.delete).delete()
        session.commit()
        session.close()
    if args.set:
        Variable.set(args.set[0], args.set[1])
    # Work around 'import' as a reserved keyword
    imp = getattr(args, 'import')
    if imp:
        if os.path.exists(imp):
            import_helper(imp)
        else:
            print("Missing variables file.")
    if args.export:
        export_helper(args.export)
    if not (args.set or args.get or imp or args.export or args.delete):
        # list all variables
        session = settings.Session()
        vars = session.query(Variable)
        msg = "\n".join(var.key for var in vars)
        print(msg)

예제 #2

0

파일 보기

파일: tuto2.py 프로젝트: VViles/airflow_test

def set_sms(*args, **context):
    group = Variable.get('group')
    if group == 'night_shift':
        context['task_instance'].xcom_push('recipient', '0011223344')
        context['task_instance'].xcom_push('message', 'night airflow message')
    else:
        context['task_instance'].xcom_push('recipient', '0011223344')
        context['task_instance'].xcom_push('message', 'day airflow message')

예제 #3

0

파일 보기

파일: cli.py 프로젝트: yogesh2021/airflow

def variables(args):
    if args.get:
        try:
            var = Variable.get(args.get, deserialize_json=args.json, default_var=args.default)
            print(var)
        except ValueError as e:
            print(e)
    if args.set:
        Variable.set(args.set[0], args.set[1])
    if not args.set and not args.get:
        # list all variables
        session = settings.Session()
        vars = session.query(Variable)
        msg = "\n".join(var.key for var in vars)
        print(msg)

예제 #4

0

파일 보기

파일: slack_notify_plugin.py 프로젝트: CityOfPhiladelphia/phl-airflow

    def failed(self, context):
        self.conf = context["conf"]
        self.task = context["task"]
        self.execution_date = context["execution_date"]
        self.dag = context["dag"]
        self.errors = SlackAPIPostOperator(
            task_id='task_failed',
            token=Variable.get('slack_token'),
            channel='C1SRU2R33',
            text="Your DAG has encountered an error, please follow the link to view the log details:  " + "http://localhost:8080/admin/airflow/log?" + "task_id=" + task.task_id + "&" +\
            "execution_date=" + execution_date.isoformat() + "&" + "dag_id=" + dag.dag_id,
            dag=pipeline
        )

        errors.execute()

예제 #5

0

파일 보기

파일: istio_common_dag.py 프로젝트: caiqingqing1990/istio

def ReportDailySuccessful(task_instance, **kwargs):
  date = kwargs['execution_date']
  latest_run = float(Variable.get('latest_daily_timestamp'))

  timestamp = time.mktime(date.timetuple())
  logging.info('Current run\'s timestamp: %s \n'
               'latest_daily\'s timestamp: %s', timestamp, latest_run)
  if timestamp >= latest_run:
    Variable.set('latest_daily_timestamp', timestamp)
    run_sha = task_instance.xcom_pull(task_ids='get_git_commit')
    latest_version = GetSettingPython(task_instance, 'VERSION')
    logging.info('setting latest green daily to: %s', run_sha)
    Variable.set('latest_sha', run_sha)
    Variable.set('latest_daily', latest_version)
    logging.info('latest_sha test to %s', run_sha)

예제 #6

0

파일 보기

파일: error_notifications_plugin.py 프로젝트: CityOfPhiladelphia/phl-airflow

        def wrapped(context):
            """ping error in slack on failure and provide link to the log"""
            conf = context["conf"]
            task = context["task"]
            execution_date = context["execution_date"]
            dag = context["dag"]
            base_url = conf.get('webserver', 'base_url')

            # Get the ID of the target slack channel
            slack_token = Variable.get(slack_token_variable)
            sc = SlackClient(slack_token)

            response = sc.api_call('channels.list')
            for channel in response['channels']:
                if channel['name'].lower() == channel_name.lower():
                    break
            else:
                raise AirflowException('No channel named {} found.'.format(channel_name))

            # Construct a slack operator to send the message off.
            notifier = cls(
                task_id='task_failed',
                token=slack_token,
                channel=channel['id'],
                text=(
                    "Your DAG has encountered an error, please follow the link "
                    "to view the log details:  "
                    "{}/admin/airflow/log?"
                        "task_id={}&"
                        "dag_id={}&"
                        "execution_date={}"
                    ).format(base_url, task.task_id, dag.dag_id,
                             execution_date.isoformat()),
                dag=dag,
            )
            notifier.execute()

예제 #7

0

파일 보기

import json
from datetime import datetime, timedelta

import requests
from airflow import DAG
from airflow.hooks.mysql_hook import MySqlHook
from airflow.models import Variable
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import PythonOperator

# Utils
api_key = Variable.get('api_key')
cities_ = Variable.get('cities').split(',')
cities = [x.encode('utf-8') for x in cities_]


def open_weather_response_parser(response_text, city):
    response_dict = json.loads(response_text)
    main = response_dict['main']
    temp_live = round(main['temp'] - 273.15, 2)
    temp_max = round(main['temp_max'] - 273.15, 2)
    temp_min = round(main['temp_min'] - 273.15, 2)
    humidity = main['humidity']
    pressure = main['pressure']
    weather = response_dict['weather'][0]['main']
    wind_speed = response_dict['wind']['speed']
    time = datetime.utcnow()
    parsed_response = {
        'city': city,
        'temp_live': temp_live,
        'temp_max': temp_max,

예제 #8

0

파일 보기

파일: utils.py 프로젝트: techalchemy/airflow-sync

 def DEFAULT_SQL_DIR(cls):
     sql_dir = Path(Variable.get("sql_dir"))
     if not sql_dir.exists():
         PKG_PARENT = Path(__file__).absolute().parent.parent.parent.parent
         sql_dir = PKG_PARENT / "airflow-core/sql"
     return sql_dir / "salesforce"

예제 #9

0

파일 보기

파일: slack_notification_service.py 프로젝트: kirakirapq/sample-cloud-composer

import json
import os
import sys
from airflow.models import Variable
sys.path.append(Variable.get('module_path'))
# sys.path.append(os.getenv('MODULE_PATH'))
from models.dag_task_model import DagTaskModel
from models.notification_message_model import NotificationMessageModel, NotificationType
from models.notification_subject_model import NotificationSubjectModel
from models.slack_model import SlackModel
from messengers.slack_notification import SlackNotification
# import Log


class SlackNotificationService:
    def __init__(self, notification_type: str) -> None:
        self.notification_type = NotificationType(notification_type)
        self.COUNT = 0
        self.RETRY_LIMIT = 5

    def send_message(self, status: str) -> dict:
        dag_id = str(status['dag']).translate({'<:': '', '>': ''})
        task_id = str(status['task']).translate({'<': '', '>': ''})

        dag_task = DagTaskModel(dag_id, task_id)

        notification_message: NotificationMessageModel = NotificationMessageModel(
            dag_task, self.notification_type)
        notification_subject: NotificationSubjectModel = NotificationSubjectModel(
            dag_task)
        slack_model = SlackModel(notification_subject, notification_message)

예제 #10

0

파일 보기

파일: core.py 프로젝트: moritzpein/airflow

 def test_variable_set_get_round_trip(self):
     Variable.set("tested_var_set_id", "Monday morning breakfast")
     assert "Monday morning breakfast" == Variable.get("tested_var_set_id")

예제 #11

0

파일 보기

def test_access_var():
    my_var = Variable.get("hsfjskdfjhk")
    print("my var message : {}".format(my_var))
    return ("Access Var Success!")

예제 #12

0

파일 보기

파일: atd_mds_bird_staging.py 프로젝트: cityofaustin/atd-airflow

default_args = {
    'owner': 'airflow',
    'description': 'Gathers MDS data from Bird',
    'depend_on_past': False,
    'start_date': datetime(2018, 1, 1),
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
    'on_failure_callback': task_fail_slack_alert,
}

mds_provider = "bird"
current_time = datetime.now() + timedelta(days=-1, hours=-6)
time_max = f"{current_time.year}-{current_time.month}-{current_time.day}-{(current_time.hour)}"
environment_vars = Variable.get("atd_mds_config_staging",
                                deserialize_json=True)
docker_image = 'atddocker/atd-mds-etl:master'

with DAG(
        f"atd_mds_{mds_provider}_staging",
        default_args=default_args,
        schedule_interval="15 * * * *",
        catchup=False,
        tags=["staging", "mds"],
) as dag:
    #
    # Task: provider_extract
    # Description: Given a schedule block, the script extracts data from the MDS provider within the schedule's time window
    # then it uploads the data into S3 for further processing.
    #
    t1 = DockerOperator(

예제 #13

0

파일 보기

파일: istio_common_dag.py 프로젝트: yushihui/istio

def AirflowGetVariableOrBaseCase(var, base):
    try:
        return Variable.get(var)
    except KeyError:
        return base

예제 #14

0

파일 보기

from airflow import DAG
from datetime import datetime, timedelta
from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator
from airflow.operators.dummy_operator import DummyOperator
from kubernetes.client import models as k8s
from airflow.utils.dates import days_ago
from airflow.models import Variable
from airflow.operators.http_operator import SimpleHttpOperator
import urllib.request
import json

default_args = {
    'owner': 'datagap'
}

basePath = Variable.get("permit_data_base_url")
templateUrl = Variable.get("permit_data_weekly_index_url")
permitDataSource = Variable.get("permit_datasource")

def downloadTemplate(templateUrl):
  request = urllib.request.urlopen(templateUrl)
  response = request.read().decode('utf-8')

  return response

def replace(jsonContent, dataSource, interval, basePath, date, market):
  
  result = json.loads(jsonContent)
  # base data source
  result['spec']['ioConfig']['inputSource']['dataSource'] = dataSource
  # ingest data url

예제 #15

0

파일 보기

파일: factloadjob1_new.py 프로젝트: dineshvallapureddy/gitlearning

# 2. Fact tables dependent on the stage tables are also loaded
# (Fact tables dependent upon two different stages will not be loaded).
# This gives little bit of flexibility over db based taskgroups
#######################################################################################
from airflow import DAG
import base64
from datetime import timedelta
from airflow.utils.dates import days_ago
from airflow.models import Variable
from airflowcommon import getBatchId, getpythonoperator, getbashoperator, getbashoperator
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.subdag_operator import SubDagOperator
from airflow.utils.task_group import TaskGroup

# set the default config for the dag
dset = Variable.get("factloadjob1", deserialize_json=True)
kinitparms = Variable.get("kinitparms", deserialize_json=True)
password = kinitparms["kinitpass"]
password = base64.b64decode(password).decode('utf-8')
#kinitprincipal = kinitparms["kinitprincipal"]
crpmdevicedict = dset["crpm_device_mapping"]
sqoopjobs = crpmdevicedict["jobs"]
factdb = crpmdevicedict["factdb"]
srctoland = crpmdevicedict["src2land"]
land2stg = crpmdevicedict["land2stg"]
scriptpaths = dset["scriptpaths"]
kinitprincipal = kinitparms["kinitprincipal"]
kinitdomain = kinitparms["kinitdomain"]
edgenodehost = kinitparms["edgenodehost"]

default_args = {

예제 #16

0

파일 보기

파일: dm_oride_passenger_base_multi_cube.py 프로젝트: lishuailishuai/shanchu2

    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
}

dag = airflow.DAG('dm_oride_passenger_base_multi_cube',
                  schedule_interval="45 00 * * *",
                  default_args=args)
##----------------------------------------- 变量 ---------------------------------------##

db_name = "oride_dw"
table_name = "dm_oride_passenger_base_multi_cube"

##----------------------------------------- 依赖 ---------------------------------------##
#获取变量
code_map = eval(Variable.get("sys_flag"))

#判断ufile(cdh环境)
if code_map["id"].lower() == "ufile":
    # 依赖前一天分区
    dwm_oride_passenger_order_base_di_prev_day_task = UFileSensor(
        task_id='dwm_oride_passenger_order_base_di_prev_day_task',
        filepath='{hdfs_path_str}/dt={pt}/_SUCCESS'.format(
            hdfs_path_str=
            "oride/oride_dw/dwm_oride_passenger_order_base_di/country_code=NG",
            pt='{{ds}}'),
        bucket_name='opay-datalake',
        poke_interval=60,  # 依赖不满足时，一分钟检查一次依赖状态
        dag=dag)
    #路径
    hdfs_path = "ufile://opay-datalake/oride/oride_dw/" + table_name

예제 #17

0

파일 보기

파일: dividend_processor.py 프로젝트: jainita95/dividendTracker

    def dividend_probability_calculator():
        credentials = service_account.Credentials.from_service_account_info(
            Variable.get("key", deserialize_json=True))

        destination_bucket_name = 'dividend_declarations_hackathon'
        storage_client = storage.Client()
        destination_bucket = storage_client.bucket(destination_bucket_name)

        project_id = 'hackathon-wpb'
        table_id = 'hackathon-wpb.customer_relations.customer_dividend_malaysia'
        query_string = """
           SELECT * 
           FROM hackathon-wpb.customer_relations.customer_dividend_malaysia"""

        table_schema = [{
            'name': 'Ticker',
            'type': 'STRING',
            'mode': 'REQUIRED'
        }, {
            'name': 'Mic',
            'type': 'STRING',
            'mode': 'REQUIRED'
        }, {
            'name':
            'Contacts',
            'type':
            'RECORD',
            'mode':
            'REPEATED',
            'fields': [{
                'name': 'Name',
                'type': 'STRING',
                'mode': 'NULLABLE'
            }, {
                'name': 'email',
                'type': 'STRING',
                'mode': 'NULLABLE'
            }]
        }, {
            'name':
            'Dividend',
            'type':
            'RECORD',
            'mode':
            'REPEATED',
            'fields': [{
                'name': 'DeclarationYear',
                'type': 'STRING',
                'mode': 'NULLABLE'
            }, {
                'name': 'DeclaratioMonth',
                'type': 'STRING',
                'mode': 'NULLABLE'
            }, {
                'name': 'DeclarationDate',
                'type': 'STRING',
                'mode': 'NULLABLE'
            }]
        }, {
            'name': 'RecentDeclarationDate',
            'type': 'DATE',
            'mode': 'NULLABLE'
        }, {
            'name': 'NextPayableDate',
            'type': 'DATE',
            'mode': 'NULLABLE'
        }, {
            'name': 'ExpectedStartDate',
            'type': 'DATE',
            'mode': 'NULLABLE'
        }, {
            'name': 'ExpectedEndDate',
            'type': 'DATE',
            'mode': 'NULLABLE'
        }, {
            'name': 'LastRunDate',
            'type': 'DATE',
            'mode': 'NULLABLE'
        }, {
            'name': 'ProbabilityNextMonthDeclaration',
            'type': 'NUMERIC',
            'mode': 'NULLABLE'
        }, {
            'name': 'Period',
            'type': 'INTEGER',
            'mode': 'NULLABLE'
        }]

        project_id = 'hackathon-wpb'
        dataset_id = 'customer_relations'
        table_id = 'customer_dividend_malaysia'

        client = bigquery.Client(project=project_id)
        dataset = client.dataset(dataset_id)
        table = dataset.table(table_id)

        job_config = bigquery.LoadJobConfig()
        job_config.source_format = bigquery.SourceFormat.NEWLINE_DELIMITED_JSON
        job_config.schema = table_schema
        job_config.write_disposition = 'WRITE_TRUNCATE'

        dataframe_complete = pdgbq.read_gbq(query=query_string,
                                            project_id=project_id)
        dataframe = dataframe_complete
        print(dataframe.dtypes)
        print(len(dataframe))
        base = datetime.today().date()
        start_date = base + timedelta(days=30)
        end_date = base + timedelta(days=70)
        df_companies = pd.DataFrame(dataframe_complete.Ticker.unique())
        df_companies.rename(columns={0: 'Ticker'}, inplace=True)
        df_companies['ProbabilityNextMonthDeclaration'] = 0.0
        df_companies['ExpectedStartDate'] = ''
        df_companies['ExpectedEndDate'] = ''
        convert_dict = {
            'Ticker': str,
            'ProbabilityNextMonthDeclaration': float,
            'ExpectedStartDate': np.datetime64,
            'ExpectedEndDate': np.datetime64
        }

        df_companies = df_companies.astype(convert_dict)
        for ind in df_companies.index:
            company_name = df_companies['Ticker'][ind]
            df_company_temp = pd.DataFrame(
                dataframe.loc[dataframe['Ticker'] == company_name])
            df_company_temp_list = df_company_temp['Dividend']
            index = df_company_temp_list.index
            try:
                df_company_temp_2 = json_normalize(
                    df_company_temp_list[index[0]])
                df = pd.DataFrame({
                    'year': df_company_temp_2['DeclarationYear'],
                    'month': df_company_temp_2['DeclaratioMonth'],
                    'day': df_company_temp_2['DeclarationDate']
                })
                df_company_temp_2['Date'] = pd.to_datetime(df)
                if (company_name == 'CIMB.XKLS'):
                    print(df_company_temp_2)
                df_company_temp_2.drop_duplicates(subset=[
                    'DeclarationYear', 'DeclaratioMonth', 'DeclarationDate'
                ],
                                                  inplace=True)
                if (company_name == 'CIMB.XKLS'):
                    print(df_company_temp_2)
            except:
                continue
            total_declarations = len(
                df_company_temp_2['DeclarationYear'].unique())
            recent_years = []
            non_recent_years = []
            count_recent = 0
            count_recent_two_years = 0
            number_of_recent_years = 0
            number_of_non_recent_years = 0
            count_non_recent = 0
            base_date_minus_2 = base - timedelta(days=730)
            base_date_minus_5 = base - timedelta(days=1825)
            year_considered = []
            for ind2 in df_company_temp_2.index:
                months = dataframe.loc[dataframe['Ticker'] == company_name,
                                       'Period']
                if (not (math.isnan(months))):
                    start_date = base + timedelta(days=int(months) * 30)
                    end_date = start_date + timedelta(days=40)
                start_dates, end_dates = date_to_months(start_date, end_date)
                date_temp = df_company_temp_2['Date'][ind2]
                if (company_name == 'CIMB.XKLS'):
                    print(date_temp)
                if (base_date_minus_2 <= date_temp < base):
                    recent_years.append(date_temp.year)
                elif (base_date_minus_5 <= date_temp < base_date_minus_2):
                    recent_years.append(date_temp.year)
                else:
                    non_recent_years.append(date_temp.year)
                out_fmt = '%Y-%m-%d'
                #for every month check if previous declaration month/date falls in the range
                for start, end in zip(start_dates, end_dates):
                    year = start.year
                    if (company_name == 'CIMB.XKLS'):
                        print('base')
                        print(base)
                        print('start')
                        print(start)
                        print('end')
                        print(end)
                    try:
                        if start.replace(year=year) <= date_temp.replace(
                                year=year) <= end.replace(year=year):
                            if (not date_temp.year in year_considered):
                                if (base_date_minus_2 <= date_temp < base):
                                    count_recent_two_years = count_recent_two_years + 1
                                    if (company_name == 'CIMB.XKLS'):
                                        print(date_temp)
                                        print("count_recent incremented")
                                elif (base_date_minus_5 <= date_temp <
                                      base_date_minus_2):
                                    count_recent = count_recent + 1
                                    if (company_name == 'CIMB.XKLS'):
                                        print(date_temp)
                                        print("count_recent incremented")
                                elif (date_temp <= base_date_minus_5):
                                    count_non_recent = count_non_recent + 1
                                    if (company_name == 'CIMB.XKLS'):
                                        print(date_temp)
                                        print("count_non_recent incremented")
                                year_considered.append(date_temp.year)
                    except:
                        #to handle 29th feb
                        #print(date_temp)
                        one_day = timedelta(1)
                        date_temp = date_temp - one_day
                        if start.replace(year=year) <= date_temp.replace(
                                year=year) <= end.replace(year=year):
                            if (not date_temp.year in year_considered):
                                if (base_date_minus_2 <= date_temp < base):
                                    count_recent_two_years = count_recent_two_years + 1
                                    if (company_name == 'UEMS.XKLS'):
                                        print(date_temp)
                                        print("count_recent incremented")
                                elif (base_date_minus_5 <= date_temp <
                                      base_date_minus_2):
                                    count_recent = count_recent + 1
                                    if (company_name == 'UEMS.XKLS'):
                                        print(date_temp)
                                        print("count_recent incremented")
                                elif (date_temp < base_date_minus_5):
                                    count_non_recent = count_non_recent + 1
                                    #print("count_non_recent incremented")
                                    year_considered.append(date_temp.year)
            number_of_latest_years = 2
            number_of_recent_years = 3
            #(pd.Series(recent_years)).nunique()
            number_of_non_recent_years = (
                pd.Series(non_recent_years)).nunique()
            probability = (
                (3 * weird_division(count_recent_two_years,
                                    number_of_latest_years)) +
                (2 * weird_division(count_recent, number_of_recent_years)) +
                (weird_division(count_non_recent,
                                number_of_non_recent_years))) / 6
            if (company_name == 'CIMB.XKLS'):
                print(count_recent_two_years)
                print(number_of_latest_years)
                print(count_recent)
                print(number_of_recent_years)
                print(count_non_recent)
                print(non_recent_years)
                print(number_of_non_recent_years)
                print(probability)
            df_companies['ProbabilityNextMonthDeclaration'][ind] = round(
                probability, 3)
            df_companies['ExpectedStartDate'][ind] = np.datetime64(start_date)
            df_companies['ExpectedEndDate'][ind] = np.datetime64(end_date)
            dataframe.loc[dataframe['Ticker'] == company_name,
                          'ProbabilityNextMonthDeclaration'] = str(probability)
            dataframe.loc[dataframe['Ticker'] == company_name,
                          'ExpectedStartDate'] = np.datetime64(start_date)
            dataframe.loc[dataframe['Ticker'] == company_name,
                          'ExpectedEndDate'] = np.datetime64(end_date)
        dataframe_complete.drop([
            'ProbabilityNextMonthDeclaration', 'ExpectedStartDate',
            'ExpectedEndDate', 'LastRunDate'
        ],
                                axis=1,
                                inplace=True)
        df_update = pd.merge(dataframe_complete,
                             df_companies,
                             left_on='Ticker',
                             right_on='Ticker')
        df_update['LastRunDate'] = np.datetime64(base)
        df_update['NextPayableDate'] = df_update[
            'NextPayableDate'].dt.strftime('%Y-%m-%d')
        df_update['ExpectedStartDate'] = df_update[
            'ExpectedStartDate'].dt.strftime('%Y-%m-%d')
        df_update['ExpectedEndDate'] = df_update[
            'ExpectedEndDate'].dt.strftime('%Y-%m-%d')
        df_update['LastRunDate'] = df_update['LastRunDate'].dt.strftime(
            '%Y-%m-%d')
        df_update['RecentDeclarationDate'] = df_update[
            'RecentDeclarationDate'].dt.strftime('%Y-%m-%d')
        json_data = df_update.to_json(orient="records")
        json_object = json.loads(json_data)
        job = client.load_table_from_json(
            json_object, table, job_config=job_config)  # Make an API request.
        filename = 'customer_dividend_malaysia_probability_update_' + str(
            datetime.now()) + '.json'
        blob = destination_bucket.blob(filename)
        blob.upload_from_string(data=json.dumps(json_object),
                                content_type='application/json')
        job.result()
        count_mails = 0  #remove for actual code
        for ind3 in df_companies.index:
            company_name = df_companies['Ticker'][ind3]
            print(company_name)
            print(ind3)
            probability = df_companies['ProbabilityNextMonthDeclaration'][ind3]
            expected_start_date = df_update.loc[dataframe['Ticker'] ==
                                                company_name,
                                                'ExpectedStartDate'].iloc[0]
            expected_end_date = df_update.loc[
                dataframe['Ticker'] == company_name, 'ExpectedEndDate'].iloc[0]
            if (float(probability) > 0.9 and count_mails < 10):
                df_contacts_temp = pd.DataFrame(
                    dataframe.loc[dataframe['Ticker'] == company_name])
                df_contacts_temp_list = df_company_temp['Contacts']
                index = df_contacts_temp_list.index
                df_contacts = json_normalize(df_contacts_temp_list[index[0]])
                contacts = df_contacts.drop_duplicates(subset=['email'],
                                                       keep='last')
                html_string = None
                with open(
                        '/opt/bitnami/airflow/dags/git-github-com-jainita95-dividend-tracker-git/EmailTemplateUpcomingDividend.html',
                        'r') as f:
                    html_string = f.read()
                html_string = html_string.format(code=company_name,
                                                 startDate=expected_start_date,
                                                 endDate=expected_end_date,
                                                 probability=math.ceil(
                                                     probability * 100))
                name = []
                emails = []
                for ind4 in contacts.index:
                    name_contact = contacts['Name'][ind4]
                    email = contacts['email'][ind4]
                    name.append(name_contact)
                    emails.append(To(email))
                message = Mail(
                    from_email='*****@*****.**',
                    to_emails=emails,
                    subject=
                    "Notice: An Upcoming Dividend Declaration cited for " +
                    company_name,
                    html_content=html_string)
                with open(
                        '/opt/bitnami/airflow/dags/git-github-com-jainita95-dividend-tracker-git/hsbcLogo.png',
                        'rb') as f:
                    data = f.read()
                    f.close()
                encoded = base64.b64encode(data).decode()
                attachment = Attachment()
                attachment.file_content = FileContent(encoded)
                attachment.file_type = FileType('image/png')
                attachment.file_name = FileName('hsbcLogo.png')
                attachment.disposition = Disposition('inline')
                attachment.content_id = ContentId('hsbclogo')
                message.add_attachment(attachment)
                try:
                    sg = SendGridAPIClient(Variable.get("sendgridapikey"))
                    response = sg.send(message)
                    count_mails = count_mails + 1
                    #print(response.status_code)
                    #print(response.body)
                    #print(response.headers)
                except Exception as e:
                    print(e.message)

예제 #18

0

파일 보기

파일: main_dags.py 프로젝트: pandaqwerty/airflow_collector

from datetime import datetime, timedelta

from airflow.operators.subdag_operator import SubDagOperator
from airflow.models import DAG, Variable
from tester_collector.subdags.sub import all_process

PROJECT_VERSION = '1.0'
PROJECT_NAME = 'tester-collector'

# MAIN DAGS
# interval = "0 3 */1 * *"
interval = "*/10 * * * *"
DAG_ID = 'tester_collector'
start_date = datetime.strptime(Variable.get("tester_collector_start_date"),
                               "%Y-%m-%d %H:%M:%S")
emails = Variable.get('support_email_list').split(',')
default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': start_date,
    'email': emails,
    'email_on_failure': True,
    'email_on_retry': False,
    'retries': 2,
    'retry_delay': timedelta(minutes=2)
}

with DAG(dag_id=DAG_ID,
         default_args=default_args,
         schedule_interval=interval,
         start_date=start_date) as dag:

예제 #19

0

파일 보기

파일: tuto2.py 프로젝트: VViles/airflow_test

def set_mail(*args, **context):
    group = Variable.get('group')
    if group == 'night_shift':
        context['task_instance'].xcom_push(key='recipient', value='*****@*****.**')
    else:
        context['task_instance'].xcom_push(key='recipient', value='*****@*****.**')

예제 #20

0

파일 보기

from airflow import DAG
from airflow.models import Variable
from airflow.operators.python_operator import PythonOperator
from datetime import timedelta, datetime
from tasks.fetch_covid_cases import fetch_daily_data
from tasks.find_upload_percentage import find_percentage
from tasks.upload_csv_to_big_table import upload_csv_to_big_table
import yaml

# #fetching constants

# from airflow
dag_config = Variable.get("bigquery_variables", deserialize_json=True)
BQ_CONN_ID = dag_config["bq_conn_id"]
BQ_PROJECT = dag_config["bq_project"]
BQ_TABLE = dag_config["bq_table"]
BQ_DATASET = dag_config["bq_dataset"]

# form config yaml file
with open("config/pipelines/covid_pipeline.yaml", 'r') as stream:
    try:
        dag_info = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

# form library yaml file
with open("library/pipeline_defaults.yaml", 'r') as stream:
    try:
        dag_defaults = yaml.safe_load(stream)
    except yaml.YAMLError as exc:
        print(exc)

예제 #21

0

파일 보기

파일: istio_common_dag.py 프로젝트: veggiemonk/istio

  def GenerateTestArgs(**kwargs):
    """Loads the configuration that will be used for this Iteration."""
    conf = kwargs['dag_run'].conf
    if conf is None:
      conf = dict()

    """ Airflow gives the execution date when the job is supposed to be run,
        however we dont backfill and only need to run one build therefore use
        the current date instead of the date that is passed in """
#    date = kwargs['execution_date']
    date = datetime.datetime.now()

    timestamp = time.mktime(date.timetuple())

    # Monthly releases started in Nov 2017 with 0.3.0, so minor is # of months
    # from Aug 2017.
    minor_version = (date.year - 2017) * 12 + (date.month - 1) - 7
    major_version = AirflowGetVariableOrBaseCase('major_version', 0)
    # This code gets information about the latest released version so we know
    # What version number to use for this round.
    r_minor = int(AirflowGetVariableOrBaseCase('released_version_minor', 0))
    r_patch = int(AirflowGetVariableOrBaseCase('released_version_patch', 0))
    # If  we have already released a monthy for this mounth then bump
    # The patch number for the remander of the month.
    if r_minor == minor_version:
      patch = r_patch + 1
    else:
      patch = 0
    # If version is overriden then we should use it otherwise we use it's
    # default or monthly value.
    version = conf.get('VERSION')
    if monthly and not version:
      version = '{}.{}.{}'.format(major_version, minor_version, patch)

    default_conf = environment_config.get_airflow_config(
        version,
        timestamp,
        major=major_version,
        minor=minor_version,
        patch=patch,
        date=date.strftime('%Y%m%d'),
        rc=date.strftime('%H-%M'))
    config_settings = dict(VERSION=default_conf['VERSION'])
    config_settings_name = [
        'PROJECT_ID',
        'MFEST_URL',
        'MFEST_FILE',
        'GCS_STAGING_BUCKET',
        'SVC_ACCT',
        'GITHUB_ORG',
        'GITHUB_REPO',
        'GCS_GITHUB_PATH',
        'TOKEN_FILE',
        'GCR_STAGING_DEST',
        'GCR_RELEASE_DEST',
        'GCS_MONTHLY_RELEASE_PATH',
        'DOCKER_HUB',
        'GCS_BUILD_BUCKET',
        'RELEASE_PROJECT_ID',
    ]

    for name in config_settings_name:
      config_settings[name] = conf.get(name) or default_conf[name]

    if monthly:
      config_settings['MFEST_COMMIT'] = conf.get(
          'MFEST_COMMIT') or Variable.get('latest_sha')
      gcs_path = conf.get('GCS_MONTHLY_STAGE_PATH')
      if not gcs_path:
        gcs_path = default_conf['GCS_MONTHLY_STAGE_PATH']
    else:
      config_settings['MFEST_COMMIT'] = conf.get(
          'MFEST_COMMIT') or default_conf['MFEST_COMMIT']
      gcs_path = conf.get('GCS_DAILY_PATH') or default_conf['GCS_DAILY_PATH']

    config_settings['GCS_STAGING_PATH'] = gcs_path
    config_settings['GCS_BUILD_PATH'] = '{}/{}'.format(
        config_settings['GCS_BUILD_BUCKET'], gcs_path)
    config_settings['GCS_RELEASE_TOOLS_PATH'] = '{}/release-tools/{}'.format(
        config_settings['GCS_BUILD_BUCKET'], gcs_path)
    config_settings['GCS_FULL_STAGING_PATH'] = '{}/{}'.format(
        config_settings['GCS_STAGING_BUCKET'], gcs_path)
    config_settings['ISTIO_REPO'] = 'https://github.com/{}/{}.git'.format(
        config_settings['GITHUB_ORG'], config_settings['GITHUB_REPO'])

    return config_settings

예제 #22

0

파일 보기

import os
from os.path import expanduser

from airflow import DAG
from airflow.models import Variable
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import PythonOperator
from airflow.operators.http_operator import SimpleHttpOperator

from airflow.operators.idea_plugin import BigQueryTableModifiedSensor

home = expanduser("~")

STATE_PATH = '{0}/gcs/data/nwea_assessment_results_last_modified.text'.format(
    home)
IDEA2_API_KEY = Variable.get('idea2_api_key')
"""
DAG for updating Illuminate grade review data
"""

default_args = {
    "owner": "airflow",
    "depends_on_past": False,
    "start_date": datetime(2019, 3, 12),
    "email": ["*****@*****.**"],
    "email_on_failure": True,
    "email_on_retry": False,
    "retries": 1,
    "retry_delay": timedelta(minutes=2),
    "provide_context": True
    # 'queue': 'bash_queue',

예제 #23

0

파일 보기

파일: core.py 프로젝트: moritzpein/airflow

 def test_get_non_existing_var_should_return_default(self):
     default_value = "some default val"
     assert default_value == Variable.get("thisIdDoesNotExist",
                                          default_var=default_value)

예제 #24

0

파일 보기

파일: ods_sqoop_base_message_record_di.py 프로젝트: lishuailishuai/shanchu2

    }]

    TaskTimeoutMonitor().set_task_monitor(msg)


task_timeout_monitor = PythonOperator(task_id='task_timeout_monitor',
                                      python_callable=fun_task_timeout_monitor,
                                      provide_context=True,
                                      dag=dag)

##----------------------------------------- 变量 ---------------------------------------##
db_name = "opay_dw_ods"

table_name = "ods_sqoop_base_message_record_di"
hdfs_path = "oss://opay-datalake/opay_dw_sqoop_di/opay_sms/message_record"
config = eval(Variable.get("opay_time_zone_config"))


def ods_sqoop_base_message_record_di_sql_task(ds):
    HQL = '''

    set hive.exec.dynamic.partition.mode=nonstrict;
    set hive.exec.parallel=true;
    insert overwrite table {db}.{table} partition (dt)
    SELECT 
        id,
        template_name,
        country_code,
        message_type,
        mobile,
        content,

예제 #25

0

파일 보기

파일: find_neighbors.py 프로젝트: wprazuch/Astral

    # 'on_success_callback': some_other_function,
    # 'on_retry_callback': another_function,
    # 'sla_miss_callback': yet_another_function,
    # 'trigger_rule': 'all_success'
}

dag = DAG(
    "3_find_neighbours",
    default_args=default_args,
    description="Find neighbouring waves in a timespace",
    schedule_interval=timedelta(days=1),
)

rootdir = "/app/data"

filename = Variable.get("filename")

if filename == "all":
    files = [file for file in os.listdir(rootdir) if file.endswith(".tif")]
else:
    files = [filename]

tolerance_xy = Variable.get("tolerance_xy")
tolerance_t = Variable.get("tolerance_t")
intersect_threshold = Variable.get("intersection_threshold")

for file in files:
    filename = file
    directory = filename.split(".")[0]
    directory = process_task_name(directory)

예제 #26

0

파일 보기

default_args = {
    'owner': 'chr0nomaton',
    'depends_on_past': False,
    'email': ['*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=5),
}

dag = DAG('Automata',
          start_date=datetime(2018, 12, 30),
          default_args=default_args,
          schedule_interval=timedelta(days=1))

user_id = Variable.get("SPOTIFY_CLIENT_USER_ID")
auth_token = Variable.get("SPOTIFY_CLIENT_TOKEN_CACHE")
with open(f"/usr/local/airflow/.cache-{user_id}", "w+") as f:
    print(f"Wrote {auth_token} to /usr/local/airflow/.cache-{user_id}")
    f.write(auth_token)
spotify = SpotifyAPI(
    user_id=user_id,
    client_id=Variable.get("SPOTIFY_CLIENT_ID"),
    client_secret=Variable.get("SPOTIFY_CLIENT_SECRET"),
    redirect_uri=Variable.get("SPOTIFY_CLIENT_REDIRECT_URI"),
)
with open(f"/usr/local/airflow/.cache-{user_id}") as f:
    Variable.set("SPOTIFY_CLIENT_TOKEN_CACHE", f.read())

t1_get_bands = PythonOperator(task_id='fetch_reddit_posts',
                              python_callable=get_reddit_posts,

예제 #27

0

파일 보기

파일: utils.py 프로젝트: techalchemy/airflow-sync

def get_sql_dir():
    sql_dir = Path(Variable.get("sql_dir"))
    if not sql_dir.exists():
        PKG_PARENT = Path(__file__).absolute().parent.parent.parent.parent
        sql_dir = PKG_PARENT / "airflow-core/sql"
    return sql_dir

예제 #28

0

파일 보기

def get_reddit_posts(**context):
    reddit = CurrentDaysBands(
        client_id=Variable.get("REDDIT_CLIENT_ID"),
        client_secret=Variable.get("REDDIT_CLIENT_SECRET"),
        user_agent=Variable.get("REDDIT_USER_AGENT"))
    return reddit.get_bands(context['yesterday_ds'])

예제 #29

0

파일 보기

from postgres_check_operator import (
    PostgresMultiCheckOperator,
    COUNT_CHECK,
    GEO_CHECK,
)

from sql.wior import (
    DROP_COLS,
    SQL_DROP_TMP_TABLE,
    SQL_GEOM_VALIDATION,
    SQL_ADD_PK,
    SQL_SET_DATE_DATA_TYPES,
)

dag_id: str = "wior"
variables: Dict = Variable.get(dag_id, deserialize_json=True)
data_endpoint: Dict = variables["data_endpoints"]["wfs"]
tmp_dir: str = f"{SHARED_DIR}/{dag_id}"
data_file: str = f"{tmp_dir}/{dag_id}.geojson"
db_conn: DatabaseEngine = DatabaseEngine()
password: str = env("AIRFLOW_CONN_WIOR_PASSWD")
user: str = env("AIRFLOW_CONN_WIOR_USER")
base_url: str = URL(env("AIRFLOW_CONN_WIOR_BASE_URL"))
total_checks: list = []
count_checks: list = []
geo_checks: list = []
to_zone: Optional[tzinfo] = tz.gettz("Europe/Amsterdam")


class DataSourceError(Exception):
    """Custom exeception for not available data source"""

예제 #30

0

파일 보기

}

DOCKER_IMAGE = "atddocker/atd-knack-services:production"

# command args
SCRIPT_TASK_1 = "records_to_postgrest"
SCRIPT_TASK_2 = "records_to_agol"
SCRIPT_TASK_3 = "agol_build_markings_segment_geometries"
SCRIPT_TASK_4 = "records_to_socrata"
APP_NAME = "signs-markings"
ENV = "prod"
POOL_KNACK = "knack_signs_markings"
POOL_POSTGREST = "atd_knack_postgrest_pool"
CONTAINER = "view_3100"

env_vars = Variable.get("atd_knack_services_postgrest", deserialize_json=True)
atd_knack_auth = Variable.get("atd_knack_auth", deserialize_json=True)
env_vars["KNACK_APP_ID"] = atd_knack_auth[APP_NAME][ENV]["app_id"]
env_vars["KNACK_API_KEY"] = atd_knack_auth[APP_NAME][ENV]["api_key"]
env_vars["AGOL_USERNAME"] = Variable.get("agol_username")
env_vars["AGOL_PASSWORD"] = Variable.get("agol_password")
env_vars["SOCRATA_API_KEY_ID"] = Variable.get(
    "atd_service_bot_socrata_api_key_id")
env_vars["SOCRATA_API_KEY_SECRET"] = Variable.get(
    "atd_service_bot_socrata_api_key_secret")
env_vars["SOCRATA_APP_TOKEN"] = Variable.get(
    "atd_service_bot_socrata_app_token")

with DAG(
        dag_id="atd_knack_markings_work_orders_jobs",
        description=

예제 #31

0

파일 보기

파일: operators.py 프로젝트: UnityTech/docker-airflow

    def execute(self, context):

        if self.provide_context:
            context.update(self.op_kwargs)

        log_date = context['ti'].execution_date.strftime('%Y-%m-%d')
        if self.log_date_fun is not None:
            log_date = self.log_date_fun(context)
        if self.log_hour_fun is not None:
            self.log_hour = self.log_hour_fun(context)

        current_dir = os.getcwd()
        s3_bucket_with_env = tools.s3_name_generator(self.s3_bucket, "-json", "-staging")
        s3_conn_id_with_env = tools.s3_name_generator(self.s3_conn_id, "_json", "_staging")
        s3_key_path = "druid-json-template" + "/" + self.template
        self.download_template_file(s3_conn_id_with_env, s3_bucket_with_env, s3_key_path, current_dir + "/json")

        druid_host, druid_port = Variable.get("druid_overlord").split(":")
        key, secret = GenericHook(s3_conn_id_with_env).get_credentials()
        druid = DruidAccess(druid_host, druid_port, "", "", self.data_source)
        s3 = S3Access(key, secret, False)

        if self.folder is None:
            self.folder = tools.s3_name_generator(self.prefix, "-prod", "-staging")

        s3_file_location = ""
        if self.aggregate == "DAILY":
            s3_file_location = "s3://{bucket}/{folder}/{topic}/{day_key}={log_date}/".format(
                bucket=s3_bucket_with_env, folder=self.folder,
                topic=self.topic, day_key=self.date_key,
                log_date=log_date)
        if self.aggregate == "HOURLY":
            s3_file_location = "s3://{bucket}/{folder}/{topic}/{day_key}={log_date}/{hour_key}={log_hour}/".format(
                bucket=s3_bucket_with_env, folder=self.folder,
                topic=self.topic, day_key=self.date_key,
                log_date=log_date, hour_key=self.hour_key, log_hour=self.log_hour)

        logging.info("Launching importer for %s.." % s3_file_location)
        s3_files = s3.get_filenames(s3_file_location)
        logging.info("Files Name " + ','.join(s3_files))
        running_tasks = []
        s3_files = ['"' + f + '"' for f in sorted(s3_files)]
        task = Task(s3_files)
        log_timestamp = log_date + ' ' + self.log_hour + ":00:00"
        log_timestamp_ts = (parser.parse(log_timestamp)).isoformat()
        next_log_timestamp_ts = (parser.parse(log_timestamp) + datetime.timedelta(days=1)).isoformat()
        if self.aggregate == "HOURLY":
            next_log_timestamp_ts = (parser.parse(log_timestamp) + datetime.timedelta(hours=1)).isoformat()

        logging.info("Handling task %r" % task)
        task.id = druid.upload(log_timestamp_ts, next_log_timestamp_ts, s3_files, self.template)
        logging.info("Uploading task to druid and task id is %r" % task.id)
        running_tasks.append(task)
        # Cleaning and waiting
        druid.clean_tasks(running_tasks)
        while len(running_tasks) >= self.slots:
            logging.info("Waiting for %r tasks" % len(running_tasks))
            time.sleep(10)
            running_tasks = druid.clean_tasks(running_tasks)
        while len(running_tasks) > 0:
            logging.info("Waiting for finalization of %r tasks" % len(running_tasks))
            time.sleep(10)
            running_tasks = druid.clean_tasks(running_tasks)
        logging.info("Importing done..")

예제 #32

0

파일 보기

import os
import time
import boto3
import airflow.hooks.S3_hook

from airflow import DAG
from airflow.models import Variable
from airflow.operators import BashOperator
from datetime import datetime, timedelta
from airflow.operators.python_operator import PythonOperator
from airflow.operators.dummy_operator import DummyOperator
from airflow.hooks.postgres_hook import PostgresHook

## APi key to connect to weather api
## Alternatively store them in files and source
API_KEY = Variable.get("weather_api_key")

# Following are defaults which can be overridden later on
# dag variables
default_args = {
    'owner': 'Srilekha',
    'depends_on_past': False,
    'start_date': datetime(2020, 11, 6),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=1),
}

예제 #33

0

파일 보기

파일: project-workflow.py 프로젝트: dkyos/dev-samples

from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from airflow.operators.dummy_operator import DummyOperator
from airflow.operators.python_operator import PythonOperator
from datetime import datetime,timedelta
from airflow.models import Variable

SRC=Variable.get("SRC")
#SRC='./'
COUNTRY=Variable.get("COUNTRY")
#COUNTRY='PL'

dag = DAG('project-workflow',description='Project Workflow DAG',
        schedule_interval = '*/5 0 * * *',
        start_date=datetime(2017,7,1),
        catchup=False)

xlsx_to_csv_task = BashOperator(
        task_id='xlsx_to_csv',
        bash_command='"$src"/test.sh "$country" 2nd_param_xlsx',
        env={'src': SRC, 'country': COUNTRY},
        dag=dag)

merge_command = SRC + '/test.sh ' + COUNTRY + ' 2nd_param_merge'
merge_task = BashOperator(
        task_id='merge',
        bash_command=merge_command ,
        dag=dag)

my_templated_command = """
{{ params.src }}/test.sh {{ params.country}} 2nd_param_cleansing

예제 #34

0

파일 보기

파일: utilization_kpi.py 프로젝트: vipul-tm/DAGS-PROD

}

Q_PUBLIC = "poller_queue"
Q_PRIVATE = "formatting_queue"
Q_OSPF = "poller_queue"
Q_PING = "poller_queue"

PARENT_DAG_NAME = "UTILIZATION_KPI"
utilization_kpi_dag = DAG(dag_id=PARENT_DAG_NAME,
                          default_args=default_args,
                          schedule_interval='4-59/5 * * * *')

redis_hook_util_10 = RedisHook(redis_conn_id="redis_hook_util_10")
redis_hook_2 = RedisHook(redis_conn_id="redis_hook_2")

technologies = eval(Variable.get('utilization_kpi_technologies'))
machines = eval(Variable.get("system_config_no_o1"))
devices = eval(Variable.get('hostmk.dict.site_mapping'))
attributes = eval(Variable.get('utilization_kpi_attributes'))

all_sites = []


def init_kpi():
    logging.info("TODO : Check All vars and Airflow ETL Environment here")
    redis_hook_util_10.flushall("*")
    logging.info("Flushed all in redis_hook_util_10 connection")


def get_previous_device_states(device_type):
    prev_state = eval(redis_hook_2.get("kpi_ul_prev_state_%s" % device_type))

예제 #35

0

파일 보기

파일: tuto2.py 프로젝트: VViles/airflow_test

def set_call(*args, **context):
    group = Variable.get('group')
    if group == 'night_shift':
        context['task_instance'].xcom_push(key='recipient', value='0011223344')
    else:
        context['task_instance'].xcom_push(key='recipient', value='0011223344')

예제 #36

0

파일 보기

파일: istio_common_dag.py 프로젝트: KaiwenWang/sofa-mesh

def GetVariableOrDefault(var, default):
  try:
    return Variable.get(var)
  except KeyError:
    return default

예제 #37

0

파일 보기

def load_dimension_subdag(parent_dag_name, task_id, redshift_conn_id, *args,
                          **kwargs):
    """
    A python function with arguments, which creates a dag
    :param parent_dag_name: imp ({parent_dag_name}.{task_id})
    :param task_id: imp {task_id}
    :param redshift_conn_id: {any connection id}
    :param args: {verbose}
    :param kwargs: {verbose and context variables}
    :return:
    """
    dag = DAG(f"{parent_dag_name}.{task_id}", **kwargs)

    copy_ports = StageToRedshiftOperator(task_id='copy_ports',
                                         dag=dag,
                                         redshift_conn_id="redshift",
                                         aws_credentials_id="aws_default",
                                         file='i94port.csv',
                                         delimiter=',',
                                         table='i94ports',
                                         s3_bucket=Variable.get("s3_bucket"),
                                         s3_key="csv",
                                         sql_stmt=SqlQueries.copy_csv_cmd,
                                         provide_context=True)

    copy_visa = StageToRedshiftOperator(task_id='copy_visa',
                                        dag=dag,
                                        redshift_conn_id="redshift",
                                        aws_credentials_id="aws_default",
                                        file='i94visa.csv',
                                        delimiter=',',
                                        table='i94visa',
                                        s3_bucket=Variable.get("s3_bucket"),
                                        s3_key="csv",
                                        sql_stmt=SqlQueries.copy_csv_cmd,
                                        provide_context=True)

    copy_modes = StageToRedshiftOperator(task_id='copy_modes',
                                         dag=dag,
                                         redshift_conn_id="redshift",
                                         aws_credentials_id="aws_default",
                                         file='i94mode.csv',
                                         delimiter=',',
                                         table='i94mode',
                                         s3_bucket=Variable.get("s3_bucket"),
                                         s3_key="csv",
                                         sql_stmt=SqlQueries.copy_csv_cmd,
                                         provide_context=True)

    copy_addr = StageToRedshiftOperator(task_id='copy_addr',
                                        dag=dag,
                                        redshift_conn_id="redshift",
                                        aws_credentials_id="aws_default",
                                        file='i94addr.csv',
                                        delimiter=',',
                                        table='i94addr',
                                        s3_bucket=Variable.get("s3_bucket"),
                                        s3_key="csv",
                                        sql_stmt=SqlQueries.copy_csv_cmd,
                                        provide_context=True)

    copy_country_codes = StageToRedshiftOperator(
        task_id='copy_country_codes',
        dag=dag,
        redshift_conn_id="redshift",
        aws_credentials_id="aws_default",
        file='i94cit&i94res.csv',
        delimiter=',',
        table='i94res',
        s3_bucket=Variable.get("s3_bucket"),
        s3_key="csv",
        sql_stmt=SqlQueries.copy_csv_cmd,
        provide_context=True)

    copy_cities_demographics = StageToRedshiftOperator(
        task_id='copy_cities_demographics',
        dag=dag,
        redshift_conn_id="redshift",
        aws_credentials_id="aws_default",
        file='us-cities-demographics.csv',
        delimiter=';',
        table='us_cities_demographics',
        s3_bucket=Variable.get("s3_bucket"),
        s3_key="csv",
        sql_stmt=SqlQueries.copy_csv_cmd,
        provide_context=True)

    copy_airports = StageToRedshiftOperator(
        task_id='copy_airports',
        dag=dag,
        redshift_conn_id="redshift",
        aws_credentials_id="aws_default",
        file='airport-codes_csv.csv',
        delimiter=',',
        table='airport_codes',
        s3_bucket=Variable.get("s3_bucket"),
        s3_key="csv",
        sql_stmt=SqlQueries.copy_csv_cmd,
        provide_context=True)

    def parquet_to_redshift(table, s3_bucket, s3_key, iam_role, sql_stmt,
                            redshift_conn_id, **kwargs):
        """
        This function reads parquet files and copies them to redshift
        schema.db
        :param table:
        :param s3_bucket:
        :param s3_key:
        :param iam_role:
        :param sql_stmt:
        :param redshift_conn_id:
        :param kwargs:
        :return:
        """
        redshift = PostgresHook(postgres_conn_id=redshift_conn_id)
        logging.info("Copying data from S3 to Redshift")
        s3_path = "s3://{}/{}".format(s3_bucket, s3_key)
        formatted_sql = sql_stmt.format(table, s3_path, iam_role)
        redshift.run(formatted_sql)
        aws_hook = AwsHook("aws_default")
        credentials = aws_hook.get_credentials()
        client = boto3.client('s3',
                              aws_access_key_id=credentials.access_key,
                              aws_secret_access_key=credentials.secret_key)
        objects_to_delete = client.list_objects(
            Bucket=Variable.get("s3_bucket"), Prefix="parquet")
        delete_keys = {'Objects': []}
        delete_keys['Objects'] = [
            {
                'Key': k
            } for k in
            [obj['Key'] for obj in objects_to_delete.get('Contents', [])]
        ]
        client.delete_objects(Bucket=Variable.get("s3_bucket"),
                              Delete=delete_keys)

    copy_immigration = PythonOperator(
        task_id='copy_immigration',
        python_callable=parquet_to_redshift,  # changed
        provide_context=True,
        op_kwargs={
            'table': "immigration",
            's3_bucket': Variable.get("s3_bucket"),
            's3_key': 'parquet',
            'iam_role': Variable.get('iam_role'),
            'sql_stmt': SqlQueries.copy_parquet_cmd,
            'redshift_conn_id': 'redshift'
        },
        dag=dag)

    copy_ports
    copy_visa
    copy_modes
    copy_addr
    copy_country_codes
    copy_airports
    copy_cities_demographics
    copy_immigration

    return dag

예제 #38

0

파일 보기

파일: airflow-db-cleanup.py 프로젝트: akhilbatra898/airflow-maintenance-dags

try:
    from airflow.utils import timezone  # airflow.utils.timezone is available from v1.10 onwards
    now = timezone.utcnow
except ImportError:
    now = datetime.utcnow

DAG_ID = os.path.basename(__file__).replace(".pyc", "").replace(
    ".py", "")  # airflow-db-cleanup
START_DATE = airflow.utils.dates.days_ago(1)
SCHEDULE_INTERVAL = "@daily"  # How often to Run. @daily - Once a day at Midnight (UTC)
DAG_OWNER_NAME = "operations"  # Who is listed as the owner of this DAG in the Airflow Web Server
ALERT_EMAIL_ADDRESSES = [
]  # List of email address to send email alerts to if this job fails
DEFAULT_MAX_DB_ENTRY_AGE_IN_DAYS = int(
    Variable.get("airflow_db_cleanup__max_db_entry_age_in_days", 30)
)  # Length to retain the log files if not already provided in the conf. If this is set to 30, the job will remove those files that are 30 days old or older.
ENABLE_DELETE = True  # Whether the job should delete the db entries or not. Included if you want to temporarily avoid deleting the db entries.
DATABASE_OBJECTS = [  # List of all the objects that will be deleted. Comment out the DB objects you want to skip.
    {
        "airflow_db_model": DagRun,
        "age_check_column": DagRun.execution_date,
        "keep_last": True,
        "keep_last_filters": [DagRun.external_trigger == False],
        "keep_last_group_by": DagRun.dag_id
    },
    {
        "airflow_db_model": TaskInstance,
        "age_check_column": TaskInstance.execution_date,
        "keep_last": False,
        "keep_last_filters": None,

예제 #39

0

파일 보기

파일: hello_world_email_bonus.py 프로젝트: GoogleCloudPlatform/training-data-analyst

# This DAG is configured to print the date and sleep for 5 seconds.
# However, it is configured to fail (see the expect_failure bash_command)
# and send an e-mail to your specified email on task failure.

from airflow import DAG
from airflow.models import Variable
from airflow.operators.bash_operator import BashOperator
from datetime import datetime, timedelta

YESTERDAY = datetime.combine(
    datetime.today() - timedelta(days=1), datetime.min.time())

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': YESTERDAY,
    'email': [ Variable.get('email') ],
    'email_on_failure': True,
    'email_on_retry': False,
    'retries': 0,
}

with DAG('hello_world_email_bonus', default_args=default_args) as dag:
  t1 = BashOperator(task_id='print_date', bash_command='date', dag=dag)
  t2 = BashOperator(task_id='expect_failure', bash_command='exit 1', dag=dag)
  t1 >> t2

예제 #40

0

파일 보기

파일: pre-msa.py 프로젝트: veg/SARS-CoV-2

import pathlib
from pathlib import Path

p = os.path.abspath(
    str(pathlib.Path(__file__).parent.absolute()) + '/../../python/')
if p not in sys.path:
    sys.path.append(p)

from export_sequences_without_premsa import export_sequences
from store_premsa import store_premsa_file
from premsa_log_parse import mark_troubled
from mark_premsa_dupes import mark_premsa_dupes
from get_raw_duplicates import write_raw_duplicates
from mark_duplicates import mark_duplicates

WORKING_DIR = Variable.get("WORKING_DIR")
DATE_STRING = datetime.date.today().strftime('%Y-%m-%d')

default_args = {
    'owner': 'sweaver',
    'depends_on_past': False,
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'params': {
        'working_dir': WORKING_DIR,
        'num_procs': 16,
        'python':
        "/data/shares/veg/SARS-CoV-2/SARS-CoV-2-devel/env/bin/python3",
        'hyphy': "/data/shares/veg/SARS-CoV-2/hyphy/hyphy",
        'hyphy_mpi': "/data/shares/veg/SARS-CoV-2/hyphy/HYPHYMPI",

예제 #41

0

파일 보기

파일: istio_common_dag.py 프로젝트: veggiemonk/istio

 def AirflowGetVariableOrBaseCase(var, base):
   try:
     return Variable.get(var)
   except KeyError:
     return base

예제 #42

0

파일 보기

def my_function():
    from airflow.models import Variable
    catalogs_folder = Variable.get("CATALOGS_FOLDER")
    
    import numpy as np
    import pandas as pd
    from pplaa import Project
    
    prj = Project()
    prj.init(catalogs_folder + '/example_006')

    prj.cat.raw.pokemon.load()

    prj.cat.raw.pokemon.load()['HP'].max()  # Max HP

    validation_rules = {
        'raw.pokemon': {
            'rules': [
                {
                    'rtype': 'REQUIRED_COLUMNS_RULE',
                    'mandatory': 1,
                    'columns': [
                        'Name', 'Type 1', 'Total', 'HP'
                    ],
                    'strict': 0,
                    'paused': 0
                }, 
                {
                    'rtype': 'MIN_MAX_RULE',
                    'mandatory': 1,
                    'column': 'HP',
                    'min_value': 0,
                    'max_value': 255   # <-- Max HP
                }
            ]
        }
    }

    prj.cat.set_validation_rules(validation_rules)

    prj.cat.validate('raw.pokemon').passed

    # The cat.validate() method returns a ValidationReport object
    type(prj.cat.validate('raw.pokemon'))

    # When we print a ValidationReport, we obtain a report of the result
    print(prj.cat.validate('raw.pokemon'))

    # Forcing fail in the validation changing the max_value for HP (MIN_MAX_RULE)
    validation_rules['raw.pokemon']['rules'][1]['max_value'] = 254

    prj.cat.set_validation_rules(validation_rules)

    prj.cat.validate('raw.pokemon').passed

    print(prj.cat.validate('raw.pokemon'))

    # REQUIRED_COLUMNS_RULE -> Ok
    vars(prj.cat.validate('raw.pokemon').validation_result.results[0]['result'])

    # MIN_MAX_RULE -> Fail
    vars(prj.cat.validate('raw.pokemon').validation_result.results[1]['result'])

예제 #43

0

파일 보기

    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': dt.timedelta(minutes=5),
    'queue': queue,
}

# initiate the DAG
dag = DAG(
    dag_name,
    default_args=default_args,
    description='Running multiple HEC-HMSs using Google Kubernetes Engine',
    schedule_interval=schedule_interval)

hec_config = Variable.get(hec_config_var, deserialize_json=True)

for i in range(parallel_runs):
    generate_run_id = PythonOperator(
        task_id='gen-run-id',
        python_callable=af_kube_utils.generate_random_run_id,
        op_args=[run_id_prefix],
        op_kwargs={"suffix": "%04d" % i},
        provide_context=True,
        dag=dag)

    logging.info('Initializing hec-hms pod')
    hec_pod = get_base_pod()
    hec_pod.metadata.name = 'kube-pod-{{ ti.xcom_pull(task_ids=\'gen-run-id\') }}'
    hec_pod.spec.containers[
        0].name = 'kube-cont-{{ ti.xcom_pull(task_ids=\'gen-run-id\') }}'

예제 #44

0

파일 보기

파일: forecast_get_sales.py 프로젝트: PPPPParadise/Carrefour-China-Supply-Chain-Forecast

# # create formatter and add it to the handlers
# formatter = logging.Formatter('%(asctime)s - %(process)s - %(module)s - %(funcName)s - %(levelname)s - %(message)s')
# fh.setFormatter(formatter)
# ch.setFormatter(formatter)
# # add the handlers to the logger
# logger.addHandler(fh)
# logger.addHandler(ch)

import datetime
from airflow import DAG
from airflow.models import Variable
from airflow.operators.bash_operator import BashOperator
from airflow.operators.python_operator import PythonOperator
from openpyxl import Workbook

project_folder = Variable.get("project_folder")
order_output_folder = Variable.get("order_output_folder")
store_order_file = Variable.get("store_order_file_name")

default_args = {
    'owner': 'Carrefour',
    'start_date': datetime.datetime(2019, 8, 19),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=5),
    'end_date': datetime.datetime(2030, 1, 1),
}

dag = DAG('get_sales',

예제 #45

0

파일 보기

파일: core.py 프로젝트: moritzpein/airflow

 def test_variable_set_get_round_trip_json(self):
     value = {"a": 17, "b": 47}
     Variable.set("tested_var_set_id", value, serialize_json=True)
     assert value == Variable.get("tested_var_set_id", deserialize_json=True)

예제 #46

0

파일 보기

    StructType,
    StringType,
    DoubleType,
    IntegerType
)
from pyspark.sql.functions import udf, monotonically_increasing_id

from helpers import (
    practice_prescribing_schema,
    chemicals_schema,
    practices_schema,
    practice_size_schema,
    bnf_codes_schema
)

S3_STAGING = Variable.get('s3_output_bucket')
S3_RAW_DATA = Variable.get('s3_input_bucket')
aws_access_key_id = Variable.get('aws_access_key_id')
aws_secret_key = Variable.get('aws_secret_access_key')


class PreprocessToS3Operator(BaseOperator):

    ui_color = '#80BD9E'

    @apply_defaults
    def __init__(self,
                 schema="",
                 s3_bucket="",
                 s3_key="",
                 filename="",

예제 #47

0

파일 보기

파일: core.py 프로젝트: moritzpein/airflow

 def test_get_non_existing_var_should_not_deserialize_json_default(self):
     default_value = "}{ this is a non JSON default }{"
     assert default_value == Variable.get("thisIdDoesNotExist",
                                          default_var=default_value,
                                          deserialize_json=True)

예제 #48

0

파일 보기

파일: varialbes.py 프로젝트: Aleks-Ya/yaal_examples

"""
Working with Variables.
Doc: https://airflow.apache.org/concepts.html?highlight=variable#variables
"""

from airflow import DAG
from airflow.operators.bash_operator import BashOperator
from datetime import datetime
from airflow.models import Variable

text_variable = Variable.get("user")

# Getting a JSON var doesn't work ()
#json_variable = Variable.get("json_var", deserialize_json = True)

default_args = {
    'start_date': datetime.now()
}

dag = DAG('varialbes', default_args=default_args)

text_message = f"echo 'The user variable is {text_variable}'"
#json_message = f"echo 'The json_var={json_variable}'"
t1 = BashOperator(
    task_id='text_variable',
    bash_command=text_message,
    dag=dag)