import datetime
import logging

from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from airflow.operators.dummy_operator import DummyOperator
def test_dag_function():
    logging.info("test_dag_function is running...")

test_dag = DAG(
    dag_id='test_dag',
    start_date=datetime.datetime.now())

start_operator = DummyOperator(task_id='Begin_execution', dag=test_dag)

# create_tables_in_redshift =  DummyOperator(task_id='create_tables_in_redshift',  dag=test_dag)

stage_events_to_redshift = DummyOperator(task_id='stage_events', dag=test_dag)
stage_songs_to_redshift = DummyOperator(task_id='stage_songs', dag=test_dag)

load_songplays_table = DummyOperator(
    task_id='Load_songplays_fact_table',
    dag=test_dag
)

load_user_dimension_table = DummyOperator(
    task_id='Load_user_dim_table',
    dag=test_dag
)

load_song_dimension_table = DummyOperator(
예제 #2
0
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

import os
from datetime import datetime

from airflow import DAG
from airflow.operators.dummy import DummyOperator

DEFAULT_DATE = datetime(2016, 1, 1)

args = {
    'owner': 'airflow',
    'start_date': DEFAULT_DATE,
}

dag = DAG(dag_id='test_om_failure_callback_dag', default_args=args)


def write_data_to_callback(*arg, **kwargs):  # pylint: disable=unused-argument
    with open(os.environ.get('AIRFLOW_CALLBACK_FILE'), "w+") as f:
        f.write("Callback fired")


task = DummyOperator(
    task_id='test_om_failure_callback_task', dag=dag, on_failure_callback=write_data_to_callback
)
예제 #3
0
                                          'example.json')

SNOWFLAKE_SELECT_SQL = f"SELECT * FROM {SNOWFLAKE_SAMPLE_TABLE} LIMIT 100;"
SNOWFLAKE_SLACK_SQL = f"SELECT O_ORDERKEY, O_CUSTKEY, O_ORDERSTATUS FROM {SNOWFLAKE_SAMPLE_TABLE} LIMIT 10;"
SNOWFLAKE_SLACK_MESSAGE = "Results in an ASCII table:\n" \
                          "```{{ results_df | tabulate(tablefmt='pretty', headers='keys') }}```"
SNOWFLAKE_CREATE_TABLE_SQL = f"CREATE TRANSIENT TABLE IF NOT EXISTS {SNOWFLAKE_LOAD_TABLE}(data VARIANT);"

default_args = {
    'owner': 'airflow',
    'start_date': days_ago(2),
}

dag = DAG(
    'example_snowflake',
    default_args=default_args,
    tags=['example'],
)

select = SnowflakeOperator(
    task_id='select',
    snowflake_conn_id=SNOWFLAKE_CONN_ID,
    sql=SNOWFLAKE_SELECT_SQL,
    dag=dag,
)

slack_report = SnowflakeToSlackOperator(task_id="slack_report",
                                        sql=SNOWFLAKE_SLACK_SQL,
                                        slack_message=SNOWFLAKE_SLACK_MESSAGE,
                                        snowflake_conn_id=SNOWFLAKE_CONN_ID,
                                        slack_conn_id=SLACK_CONN_ID,
import logging

from airflow import DAG
from airflow.operators.python_operator import PythonOperator


#
# TODO: Define a function for the PythonOperator to call and have it log something
#
# def my_function():
#     logging.info(<REPLACE>)
def routine():
    logging.info('HELLO WORLD')

dag = DAG(
        'lesson1.exercise1',
        start_date=datetime.datetime.now())

#
# TODO: Uncomment the operator below and replace the arguments labeled <REPLACE> below
#
dag = DAG('lesson1.exercise1',start_date = datetime.datetime.now())

greet_task = PythonOperator(
    task_id = 'rountine_task',
    ptyhon_callable = rountine,
    dag = dag)

#greet_task = PythonOperator(
#    task_id="<REPLACE>",
#    python_callable=<REPLACE>,
예제 #5
0
# AWS_KEY = os.environ.get('AWS_KEY')
# AWS_SECRET = os.environ.get('AWS_SECRET')

default_args = {
    'catchup': False,
    'depends_on_past': False,
    'email_on_failure': False,
    'owner': 'udacity',
    'retries': 3,
    'retry_delay': timedelta(minutes=5),
    'start_date': datetime.now()
}

dag = DAG('udac_example_dag',
          default_args=default_args,
          description='Load and transform data in Redshift with Airflow',
          schedule_interval='@daily')

start_operator = DummyOperator(task_id='Begin_execution', dag=dag)

stage_events_to_redshift = StageToRedshiftOperator(
    task_id='stage_events',
    aws_conn_id='aws_conn_id',
    redshift_conn_id='redshift_conn_id',
    iam_role="arn:aws:iam::850743350707:role/udacity-redshift-role",
    s3_bucket='udacity-dend',
    s3_key='log-data',
    table='staging_events',
    dag=dag)

stage_songs_to_redshift = StageToRedshiftOperator(
예제 #6
0
"""
Executing tasks at a particular time of the day using sensor operator.
"""
from airflow import DAG
from airflow.operators import BashOperator, TimeSensor
from datetime import datetime, timedelta, time

default_args = {
    'owner': 'Samarth',
    'start_date': datetime(2016, 03, 15, 12),
}

# "schedule_interval" is your cron expression you can write any cron expression like unix cron.
dag = DAG('airflow_task_with_time_sensor',
          default_args=default_args,
          schedule_interval="1 * * * *")

bash_task = BashOperator(
    task_id='dependency_for_sensor',
    bash_command='echo "Sensor would only be enabled after I am done!"',
    dag=dag)

# Sensor operator takes "target_time" which is a specific time in a day irrespective of date/day.
# Sensor is executed once the target time has passed. In this case after 10:55 at morning.
sensor_task = TimeSensor(task_id='sensor_task',
                         target_time=time(10, 55, 1, 1),
                         dag=dag)

post_sensor_task = BashOperator(
    task_id='post_sensor_task',
    bash_command='echo "I am done, it means sensor has done its job."',
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
"""
Example DAG demonstrating the usage of BranchDayOfWeekOperator.
"""
import pendulum

from airflow import DAG
from airflow.operators.dummy import DummyOperator
from airflow.operators.weekday import BranchDayOfWeekOperator

with DAG(
        dag_id="example_weekday_branch_operator",
        start_date=pendulum.datetime(2021, 1, 1, tz="UTC"),
        catchup=False,
        tags=["example"],
        schedule_interval="@daily",
) as dag:
    # [START howto_operator_day_of_week_branch]
    dummy_task_1 = DummyOperator(task_id='branch_true', dag=dag)
    dummy_task_2 = DummyOperator(task_id='branch_false', dag=dag)

    branch = BranchDayOfWeekOperator(
        task_id="make_choice",
        follow_task_ids_if_true="branch_true",
        follow_task_ids_if_false="branch_false",
        week_day="Monday",
    )

    # Run dummy_task_1 if branch executes on Monday

# ================ DAG definition ================
default_args = {
    'owner': 'airflow',
    'description': 'Hourly monitor of DAGs',
    'start_date': datetime(year=2020, month=4, day=14),
    'depend_on_past': False,
    'email_on_failure': False,
    'retries': 3,
    'retry_delay': timedelta(minutes=5)
}

dag = DAG(
    dag_id=SELF_DAG_ID,
    default_args=default_args,
    schedule_interval='@hourly',
    catchup=False
)

# ================ task definitions ================
start_task = DummyOperator(task_id='start', dag=dag)
end_task = DummyOperator(task_id='end', dag=dag)
check_statuses_task = DummyOperator(task_id='check_statuses', dag=dag)

trigger_condition_table_dag_task = TriggerDagRunOperator(
    task_id='trigger_condition_table_dag',
    trigger_dag_id='condition_table_dag',
    dag=dag
)

condition_table_sensor_task = PythonOperator(
db_conn = DatabaseEngine()
total_checks = []
count_checks = []
geo_checks = []
check_name = {}


# needed to put quotes on elements in geotypes for SQL_CHECK_GEO
def quote(instr):
    return f"'{instr}'"


with DAG(
    dag_id,
    description="locaties veilige-afstandobjecten zoals Vuurwerkopslag, Wachtplaats,"
    "Bunkerschip, Sluis, Munitieopslag, Gasdrukregel -en meetstation",
    default_args=default_args,
    user_defined_filters=dict(quote=quote),
    template_searchpath=["/"],
) as dag:

    # 1. Post info message on slack
    slack_at_start = MessageOperator(
        task_id="slack_at_start",
        http_conn_id="slack",
        webhook_token=slack_webhook_token,
        message=f"Starting {dag_id} ({DATAPUNT_ENVIRONMENT})",
        username="******",
    )

    # 2. Create temp directory to store files
    mkdir = BashOperator(task_id="mkdir", bash_command=f"mkdir -p {tmp_dir}")
예제 #10
0
            '-s',
            "{{ task_instance.xcom_pull('parse_request', key='s3location') }}",
            '-d',
            's3://demo-wcd/',
            '-c',
            'job',
            '-m',
            'append',
            '--input-options',
            'header=true'
        ]
    }
}]

dag = DAG('emr_job_flow_manual_steps_dag',
          default_args=DEFAULT_ARGS,
          dagrun_timeout=timedelta(hours=2),
          schedule_interval='0 3 * * *')

parse_request = PythonOperator(task_id='parse_request',
                               provide_context=True,
                               python_callable=retrieve_s3_file,
                               dag=dag)

step_adder = EmrAddStepsOperator(task_id='add_steps',
                                 job_flow_id=CLUSTER_ID,
                                 aws_conn_id='aws_default',
                                 steps=SPARK_TEST_STEPS,
                                 dag=dag)

step_checker = EmrStepSensor(
    task_id='watch_step',
예제 #11
0
from airflow import DAG
from airflow.operators.python_operator import PythonOperator
from datetime import datetime


def print_hello():
    return 'Hello world!'


dag = DAG('hello_world',
          description='Simple tutorial DAG',
          start_date=datetime(2020, 1, 1),
          schedule_interval=None)

hello_operator = PythonOperator(task_id='hello_task',
                                python_callable=print_hello,
                                dag=dag)
 def setUp(self):
     args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     dag = DAG(TEST_DAG_ID, default_args=args)
     self.dag = dag
    def setUp(self):
        args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
        dag = DAG(TEST_DAG_ID, default_args=args)
        self.dag = dag

        rows = [
            (1880, "John", 0.081541, "boy"),
            (1880, "William", 0.080511, "boy"),
            (1880, "James", 0.050057, "boy"),
            (1880, "Charles", 0.045167, "boy"),
            (1880, "George", 0.043292, "boy"),
            (1880, "Frank", 0.02738, "boy"),
            (1880, "Joseph", 0.022229, "boy"),
            (1880, "Thomas", 0.021401, "boy"),
            (1880, "Henry", 0.020641, "boy"),
            (1880, "Robert", 0.020404, "boy"),
            (1880, "Edward", 0.019965, "boy"),
            (1880, "Harry", 0.018175, "boy"),
            (1880, "Walter", 0.014822, "boy"),
            (1880, "Arthur", 0.013504, "boy"),
            (1880, "Fred", 0.013251, "boy"),
            (1880, "Albert", 0.012609, "boy"),
            (1880, "Samuel", 0.008648, "boy"),
            (1880, "David", 0.007339, "boy"),
            (1880, "Louis", 0.006993, "boy"),
            (1880, "Joe", 0.006174, "boy"),
            (1880, "Charlie", 0.006165, "boy"),
            (1880, "Clarence", 0.006165, "boy"),
            (1880, "Richard", 0.006148, "boy"),
            (1880, "Andrew", 0.005439, "boy"),
            (1880, "Daniel", 0.00543, "boy"),
            (1880, "Ernest", 0.005194, "boy"),
            (1880, "Will", 0.004966, "boy"),
            (1880, "Jesse", 0.004805, "boy"),
            (1880, "Oscar", 0.004594, "boy"),
            (1880, "Lewis", 0.004366, "boy"),
            (1880, "Peter", 0.004189, "boy"),
            (1880, "Benjamin", 0.004138, "boy"),
            (1880, "Frederick", 0.004079, "boy"),
            (1880, "Willie", 0.00402, "boy"),
            (1880, "Alfred", 0.003961, "boy"),
            (1880, "Sam", 0.00386, "boy"),
            (1880, "Roy", 0.003716, "boy"),
            (1880, "Herbert", 0.003581, "boy"),
            (1880, "Jacob", 0.003412, "boy"),
            (1880, "Tom", 0.00337, "boy"),
            (1880, "Elmer", 0.00315, "boy"),
            (1880, "Carl", 0.003142, "boy"),
            (1880, "Lee", 0.003049, "boy"),
            (1880, "Howard", 0.003015, "boy"),
            (1880, "Martin", 0.003015, "boy"),
            (1880, "Michael", 0.00299, "boy"),
            (1880, "Bert", 0.002939, "boy"),
            (1880, "Herman", 0.002931, "boy"),
            (1880, "Jim", 0.002914, "boy"),
            (1880, "Francis", 0.002905, "boy"),
            (1880, "Harvey", 0.002905, "boy"),
            (1880, "Earl", 0.002829, "boy"),
            (1880, "Eugene", 0.00277, "boy"),
        ]

        self.env_vars = {
            'AIRFLOW_CTX_DAG_ID': 'test_dag_id',
            'AIRFLOW_CTX_TASK_ID': 'test_task_id',
            'AIRFLOW_CTX_EXECUTION_DATE': '2015-01-01T00:00:00+00:00',
            'AIRFLOW_CTX_DAG_RUN_ID': '55',
            'AIRFLOW_CTX_DAG_OWNER': 'airflow',
            'AIRFLOW_CTX_DAG_EMAIL': '*****@*****.**',
        }

        from airflow.hooks.mysql_hook import MySqlHook
        with MySqlHook().get_conn() as cur:
            cur.execute('''
            CREATE TABLE IF NOT EXISTS baby_names (
              org_year integer(4),
              baby_name VARCHAR(25),
              rate FLOAT(7,6),
              sex VARCHAR(4)
            )
            ''')

        for row in rows:
            cur.execute("INSERT INTO baby_names VALUES(%s, %s, %s, %s);", row)
예제 #14
0
            ret[r['name']] = m[1]
    print(ret)
    return ret


def print_context(ds, **kwargs):
    logging.info("kwargs: %s" % kwargs)
    logging.info("ds: %s" % ds)
    ti = kwargs['ti']
    return 'Whatever you return gets printed in the logs ' + str(
        ti.xcom_pull(key=None, task_ids=['get_data']))


dag = DAG('mymean',
          description='Simple Mean of the temperature from last year',
          default_args={'owner': 'iiot-book'},
          schedule_interval='* * * * 0',
          start_date=datetime.datetime(2018, 6, 21),
          catchup=False)

kairos_operator = KairosDBOperator(task_id='get_data',
                                   query={
                                       "metrics": [{
                                           "tags": {},
                                           "name":
                                           "device0.my.measure.temperature",
                                           "aggregators": [{
                                               "name": "scale",
                                               "factor": "1.0"
                                           }]
                                       }],
                                       "plugins": [],
from airflow import DAG
import datetime as dt
from airflow.contrib.operators.spark_submit_operator import SparkSubmitOperator
from datetime import timedelta

default_args = {
    'owner': 'dl-predictor',
    'depends_on_past': False,
    'start_date': dt.datetime(2020, 9, 21),
    'retries': 0  # ,
    # 'retry_delay': timedelta(minutes=5),
}

dag = DAG(
    'dl_pre_processing',
    default_args=default_args,
    schedule_interval=None,

)


def sparkOperator(
        file,
        task_id,
        **kwargs
):
    return SparkSubmitOperator(
        application='/home/airflow/airflow/predictor_dl_model/pipeline/{}'.format(file),
        application_args=['/home/airflow/airflow/predictor_dl_model/config.yml'],
        conn_id='spark_default',
        conf={'spark.driver.maxResultSize': '8g'},
        driver_memory='32G',
예제 #16
0
    'email': ['*****@*****.**', '*****@*****.**'
              ],  # Email que irá receber informações sobre a DAG
    'email_on_failure':
    False,  # Se deseja ser notificado a cada falha que ocorrer na DAG
    'email_on_retry':
    False,  # Se der alguma falha, tentar fazer nova notificação 
    'retries': 1,  # Em caso de falha quantas tentativas serão notificadas
    'retry_delay': timedelta(
        minutes=1
    )  # Em caso de falha qual o tempo de tentativa entre uma notificação e outra 
}

# Definindo - DAG (fluxo)
dag = DAG(
    "treino-05-docker",  # Nome da DAG
    description="Paralelismos",  # Informação sobre a DAG
    default_args=default_args,  # Argumentos definidos na lista acima
    schedule_interval=timedelta(minutes=5)  # Intervalo de cada execução 
)

# BashOperator - para marcar o início da execução
start_processing = BashOperator(task_id='start_processing',
                                bash_command='echo "Start processing!" ',
                                dag=dag)

# Task para efetuar o download dos dados
get_data = BashOperator(
    task_id='get-data',
    bash_command=
    'curl http://download.inep.gov.br/microdados/Enade_Microdados/microdados_enade_2019.zip -o /usr/local/airflow/data/microdados_enade_2019.zip',
    dag=dag)
                 my_templated_field,
                 *args,
                 **kwargs):
        """
        :param my_templated_field: The value of the templated field.
        :type my_templated_field: string
        """
        super(MyCustomOperator, self).__init__(*args, **kwargs)
        self.my_templated_field = my_templated_field  # This will be replaced at execution time as this field is declared in the class property 'template_fields'.

    def execute(self, context):
        # List objects.
        self.log.info('MyCustomOperator executed with value for the templated field : %s', self.my_templated_field)

        # Do stuff...


with DAG(dag_id='09_custom_operators',
         schedule_interval='*/10 * * * *',
         catchup=False,
         default_args=default_args) as dag:
    using_static_value = MyCustomOperator(
        task_id='using_static_value',
        my_templated_field='Plouf'
    )

    using_templated_value = MyCustomOperator(
        task_id='using_templated_value',
        my_templated_field='Dag-run-id=[{{ run_id }}]'
    )
예제 #18
0
default_args = {
    'owner': 'airflow',
    'wait_for_downstream': False,
    'depends_on_past': False,
    'start_date': datetime(2019, 1, 1),
    'end_date': datetime(2019, 12, 31),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'sla': timedelta(minutes=30),
    'retry_delay': timedelta(minutes=5)
}

with DAG('Postgres2Postgres',
         default_args=default_args,
         schedule_interval='@daily') as dag:

    def transfer_daily_data(ds, **kwargs):

        src_engine = PostgresHook(
            postgres_conn_id='postgres_local').get_sqlalchemy_engine()
        tgt_engine = PostgresHook(
            postgres_conn_id='postgres_local').get_sqlalchemy_engine()

        execution_date = kwargs['execution_date'].strftime('%Y-%m-%d')
        next_date = (kwargs['execution_date'] +
                     timedelta(days=1)).strftime('%Y-%m-%d')
        execution_date_u = kwargs['execution_date'].strftime('%Y%m%d')
        logging.info(kwargs)
예제 #19
0
파일: temp12.py 프로젝트: Chiven01/dags
default_args = {
    'owner': 'chenxianxin',
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
    'execution_timeout': timedelta(hours=12),
    'depends_on_past': True,
    'start_date': utils.dates.days_ago(1)
}

dag = DAG(
    'temp12',
    default_args = default_args,
    description = u'信息流业务统计',
    schedule_interval = "@daily"
)
dag.doc_md = __doc__

env = {'HIVE_CONF_DIR':'daohang','HADOOP_CONF_DIR':'daohang'}

lens = [140,5,5,2,2]

temps = []
for n in range(lens[0]):
    temp = 'temp_%s' % n
    temp = BashOperator(
    dag = dag,
    env = env,
    task_id = temp,
예제 #20
0
import os
from ETL.SFTPToS3Operator import SFTPToS3Operator
from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.operators.bash_operator import BashOperator
from airflow.operators.dummy_operator import DummyOperator

ACCESS_KEY = os.environ.get("AWS_ACCESS_KEY_ID")
SECRET_KEY = os.environ.get("AWS_SECRET_ACCESS_KEY")
REGION_NAME = os.environ.get("AWS_DEFAULT_REGION")
BUCKET_NAME = os.environ.get("AWS_BUCKET_NAME")

dag = DAG(dag_id="wineDataDag",
          start_date=days_ago(1),
          schedule_interval="@daily")

start_dag = DummyOperator(task_id="StartDag", dag=dag)

download_file_1 = SFTPToS3Operator(task_id="download_file_1",
                                   aws_access_key_id=ACCESS_KEY,
                                   aws_secret_access_key=SECRET_KEY,
                                   region_name=REGION_NAME,
                                   bucket_name=BUCKET_NAME,
                                   pkey="*private key file path*",
                                   ip="3.9.23.108",
                                   username="******",
                                   file_to_download="winemag-data-130k-v2.csv",
                                   save_as="wine_csv_1.csv",
                                   dag=dag)

download_file_2 = SFTPToS3Operator(
예제 #21
0
    return None


def source_to_use(**kwargs):
    ti = kwargs['ti']
    source = ti.xcom_pull(task_ids='hook_task')
    print("source fetch from XCOM: {}".format(source))
    return source


def check_for_activated_source(**kwargs):
    ti = kwargs['ti']
    return ti.xcom_pull(task_ids='xcom_task').lower()


with DAG('branch_dag', default_args=default_args,
         schedule_interval='@once') as dag:
    start_task = DummyOperator(task_id='start_task')
    hook_task = PythonOperator(task_id='hook_task',
                               python_callable=get_activated_sources)
    xcom_task = PythonOperator(task_id='xcom_task',
                               python_callable=source_to_use,
                               provide_context=True)
    branch_task = BranchPythonOperator(
        task_id='branch_task',
        python_callable=check_for_activated_source,
        provide_context=True)
    mysql_task = BashOperator(task_id='mysql',
                              bash_command='echo "MYSQL is activated"')
    postgresql_task = BashOperator(
        task_id='postgresql', bash_command='echo "PostgreSQL is activated"')
    s3_task = BashOperator(task_id='s3', bash_command='echo "S3 is activated"')
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.

from datetime import timedelta

from airflow import DAG
from airflow.operators.dummy_operator import DummyOperator
from airflow.utils.dates import days_ago

with DAG(
        dag_id="test_with_non_default_owner",
        schedule_interval="0 0 * * *",
        start_date=days_ago(2),
        dagrun_timeout=timedelta(minutes=60),
        tags=["example"],
) as dag:
    run_this_last = DummyOperator(
        task_id="test_task",
        owner="John",
    )
예제 #23
0
# The adi_dimensional_by_date bq table is loaded in mango_log_processing.py

DEFAULT_ARGS = {
    'owner': '*****@*****.**',
    'depends_on_past': False,
    'start_date': datetime(2018, 1, 1),
    'email': ['*****@*****.**', '*****@*****.**'],
    'email_on_failure': True,
    'email_on_retry': True,
    'retries': 2,
    'retry_delay': timedelta(minutes=15),
}

blp_dag = DAG(
    'adi_by_region',
    default_args=DEFAULT_ARGS,
    dagrun_timeout=timedelta(hours=1),
    # Run on the first of every month, after 1AM PDT. Cron is in UTC
    schedule_interval='0 9 1 * *')

gcp_conn_id = "google_cloud_derived_datasets"
connection = GoogleCloudBaseHook(gcp_conn_id=gcp_conn_id)

aws_conn_id = 'aws_data_iam_blpadi'

location = 'us-central1-a'
cluster_name = 'bq-load-gke-1'

# Calculate run month and year. Execution date is the previous period (month)
run_month = '{{ (execution_date.replace(day=1)).strftime("%m") }}'
run_year = '{{ (execution_date.replace(day=1)).strftime("%Y") }}'
 def setUp(self):
     configuration.load_test_config()
     args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     dag = DAG('test_dag_id', default_args=args)
     self.dag = dag
     self.hql = """
from airflow import DAG
from airflow.operators.dummy_operator import DummyOperator
from operators import (RenderToS3Operator)

default_args = {
    'owner': 'avivysya',
    'start_date': datetime(2019, 1, 12),
    'depends_on_past': False,
    'retries': 8,
    'retry_delay': timedelta(minutes=5),
    'email_on_retry': False
}

dag = DAG('file_upload_dag',
          default_args=default_args,
          description='Upload data files to s3',
          schedule_interval='0 * * * *',
          catchup=False)
content_list = os.listdir('./data/processed-data/')
dir_list = filter(
    lambda x: os.path.isdir(os.path.join('./data/processed-data/', x)),
    content_list)

start_upload = DummyOperator(task_id='Upload_To_S3_Start', dag=dag)

end_upload = DummyOperator(task_id='Upload_To_S3_Finalized', dag=dag)

render_to_s3 = RenderToS3Operator(
    task_id='Render_To_S3',
    dag=dag,
    local_output=False,
from datetime import datetime
from airflow import DAG
from airflow.operators.dummy_operator import DummyOperator
from datetime import date
import json
from main.factory import pipeline_factory, app_task_factory
import os

DIR_PATH = os.path.dirname(os.path.realpath(__file__))
print (DIR_PATH)
PARENT_PATH=os.path.abspath('..')
today = date.today().strftime("%Y%m%d")
dag = DAG('humidity_uber_rides', description='Humidity Range & Uber Rides - Pipeline',
          schedule_interval='0 10 * * *',
          start_date=datetime.now(),
          catchup=False)

with open('pipeline/resources/config.json', 'r') as f:
    config = json.load(f)

start_operator = DummyOperator(task_id='start_task', dag=dag)
join_operator = DummyOperator(task_id='join_pipeline_task', dag=dag)
end_task = DummyOperator(task_id='end_task', dag=dag)

upber_data_pipeline = pipeline_factory(dag, config, today, start_operator, 'uber_data')
upber_data_pipeline.set_downstream(join_operator)
weather_data_pipeline = pipeline_factory(dag, config, today, start_operator, 'weather_data')
weather_data_pipeline.set_downstream(join_operator)

app_task = app_task_factory(config, today, dag, join_operator)
app_task.set_downstream(end_task)

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    # 'start_date': airflow.utils.dates.days_ago(2),
    # You can also set the start date as follows
    'start_date': dt.datetime(year=2018, month=10, day=22),
    'retries': 1,
    'retry_delay': dt.timedelta(minutes=5),
}

dag = DAG(
    dag_id='airflow_tutorial_v01',
    default_args=default_args,
    schedule_interval='0 * * * *',
    # You can also specify schedule interval as follows
    # schedule_interval=dt.timedelta(hours=1)
)

print_hello = BashOperator(task_id='print_hello',
                           bash_command='echo "hello"',
                           dag=dag)

sleep = BashOperator(task_id='sleep', bash_command='sleep 5', dag=dag)

print_world = PythonOperator(task_id='print_world',
                             python_callable=print_world,
                             dag=dag)

# one way of setting dependencies
예제 #28
0
from airflow import DAG
from airflow.utils.dates import days_ago
from airflow.operators import PythonOperator
args = {
    'owner': 'airflow',
    'start_date': days_ago(2),
}
dag = DAG('example_xcom',
          schedule_interval="@once",
          default_args=args,
          tags=['example'])
value_1 = [1, 2, 3]
value_2 = {'a': 'b'}


def push(**kwargs):
    """Pushes an XCom without a specific target"""
    kwargs['ti'].xcom_push(key='value from pusher 1', value=value_1)


def push_by_returning(**kwargs):
    """Pushes an XCom without a specific target, just by returning it"""
    return value_2


def puller(**kwargs):
    """Pull all previously pushed XComs and check if the pushed values match the pulled values."""
    ti = kwargs['ti']
    pulled_value_1 = ti.xcom_pull(key='value from pusher 1', task_ids='push')
    pulled_value_2 = ti.xcom_pull(task_ids='push_by_returning')
    print(pulled_value_1, pulled_value_2)
예제 #29
0
    return DingdingOperator(
        task_id='dingding_success_callback',
        dingding_conn_id='dingding_default',
        message_type='text',
        message=message,
        at_all=True,
    ).execute(context)


args['on_failure_callback'] = failure_callback
# [END howto_operator_dingding_failure_callback]

with DAG(
        dag_id='example_dingding_operator',
        default_args=args,
        schedule_interval='@once',
        dagrun_timeout=timedelta(minutes=60),
        tags=['example'],
) as dag:

    # [START howto_operator_dingding]
    text_msg_remind_none = DingdingOperator(
        task_id='text_msg_remind_none',
        dingding_conn_id='dingding_default',
        message_type='text',
        message='Airflow dingding text message remind none',
        at_mobiles=None,
        at_all=False)
    # [END howto_operator_dingding]

    text_msg_remind_specific = DingdingOperator(
    "jdoe_67890",
    'depends_on_past':
    False,
    'start_date':
    datetime.combine(datetime.today() - timedelta(1), datetime.min.time()),
    'email':
    "None",
    'email_on_failure':
    False,
    'email_on_retry':
    False,
}

dag = DAG(dag_id="jb-12348_prep",
          description="No description provided",
          catchup=True,
          max_active_runs=1,
          schedule_interval=None,
          default_args=default_args)

load_collection_0 = PythonOperator(
    task_id='load_collection_0',
    dag=dag,
    python_callable=EODataProcessor,
    op_kwargs={
        'filepaths': [
            './wekeo_data_storage/S5P_RPRO_L2__NO2____20180503T093059_20180503T111427_02866_01_010202_20190202T034117.nc',
            './wekeo_data_storage/S5P_RPRO_L2__NO2____20180502T094957_20180502T113325_02852_01_010202_20190201T215849.nc',
            './wekeo_data_storage/S5P_RPRO_L2__NO2____20180501T082724_20180501T101003_02837_01_010202_20190201T175639.nc'
        ],
        'dc_filepaths':
        None,