Beispiel #1
0
 def add_task(task_id, bash_command):
     return bash_operator.BashOperator(
         task_id=task_id,
         bash_command=bash_command,
         execution_timeout=timedelta(hours=15),
         env=environment,
         dag=dag)
def build_bash_operator(operator_ref, dag_ref):
    """
    Builds a DAG operator of type: BashOperator.
    Args:
        operator_ref (string): the definition of the operator
        dag_ref (string): the reference to the dag to associate this operator
    """
    op = bash_operator.BashOperator(task_id=operator_ref['task_id'],
                                    bash_command=";".join(
                                        operator_ref['command']),
                                    dag=dag_ref)

    return op
Beispiel #3
0
 def add_export_task(toggle, task_id, bash_command, dependencies=None):
     if toggle:
         operator = bash_operator.BashOperator(
             task_id=task_id,
             bash_command=bash_command,
             execution_timeout=timedelta(hours=15),
             env=environment,
             dag=dag)
         if dependencies is not None and len(dependencies) > 0:
             for dependency in dependencies:
                 if dependency is not None:
                     dependency >> operator
         return operator
     else:
         return None
Beispiel #4
0
 def convert_to_airflow_op(self):
     return bash_operator.BashOperator(
         bash_command='exit 1',
         task_id=self.task_id,
         trigger_rule=self.trigger_rule,
     )
# See the License for the specific language governing permissions and
# limitations under the License.

# [START composer_quickstart]
import datetime

import airflow
from airflow.operators import bash_operator

YESTERDAY = datetime.datetime.now() - datetime.timedelta(days=1)

default_args = {
    'owner': 'Composer Example',
    'depends_on_past': False,
    'email': [''],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=5),
    'start_date': YESTERDAY,
}

with airflow.DAG('composer_sample_dag',
                 'catchup=False',
                 default_args=default_args,
                 schedule_interval=datetime.timedelta(days=1)) as dag:

    # Print the dag_run id from the Airflow logs
    print_dag_run_conf = bash_operator.BashOperator(
        task_id='print_dag_run_conf', bash_command='echo {{ dag_run.id }}')
# [END composer_quickstart]
Beispiel #6
0
from airflow import models
from airflow.operators import bash_operator
from airflow.operators.gcs_to_bq import GCSToBigQueryOperator
from airflow.utils.dates import days_ago

args = {
    'owner': 'airflow',
    'start_date': days_ago(2)
}

dag = models.DAG(
    dag_id='example_gcs_to_bq_operator', default_args=args,
    schedule_interval=None, tags=['example'])

create_test_dataset = bash_operator.BashOperator(
    task_id='create_airflow_test_dataset',
    bash_command='bq mk airflow_test',
    dag=dag)

# [START howto_operator_gcs_to_bq]
load_csv = GCSToBigQueryOperator(
    task_id='gcs_to_bq_example',
    bucket='cloud-samples-data',
    source_objects=['bigquery/us-states/us-states.csv'],
    destination_project_dataset_table='airflow_test.gcs_to_bq_table',
    schema_fields=[
        {'name': 'name', 'type': 'STRING', 'mode': 'NULLABLE'},
        {'name': 'post_abbr', 'type': 'STRING', 'mode': 'NULLABLE'},
    ],
    write_disposition='WRITE_TRUNCATE',
    dag=dag)
# [END howto_operator_gcs_to_bq]
Beispiel #7
0
    'start_date': yesterday,
    'email': email,
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=5),
    'project_id': gcp_project
}

with models.DAG('product_table',
                schedule_interval=datetime.timedelta(days=1),
                default_args=default_dag_args) as dag:

    bq_make_raw_dataset = bash_operator.BashOperator(
        task_id='make_bq_raw_dataset',
        bash_command=
        'bq --location=asia-southeast1 ls {} || bq --location=asia-southeast1 mk {}'
        .format(bq_raw_dataset_name, bq_raw_dataset_name))

    raw_sql_files = read_sql_from_gcs(bq_raw_dataset_name, gcs_bucket)

    bq_start_making_raw_tables = dummy_operator.DummyOperator(
        task_id='start_making_raw_tables')

    bq_end_making_raw_tables = dummy_operator.DummyOperator(
        task_id='end_making_raw_tables')

    for filename in raw_sql_files:
        sql_statement = raw_sql_files[filename].decode()
        table_name = filename.replace('.sql', '')
        table_name = table_name.replace('raw/', '')
Beispiel #8
0
from airflow.operators import bash_operator
from airflow.operators import python_operator

yesterday = datetime.datetime.combine(
    datetime.datetime.today() - datetime.timedelta(1),
    datetime.datetime.min.time())

default_dag_args = {'start_date': yesterday}

with models.DAG('running_python_and_bash_operator',
                schedule_interval=datetime.timedelta(days=1),
                default_args=default_dag_args) as dag:

    def hello_world():
        print('Hello World!')
        return 1

    def greeting():
        print('Greetings from GCP! Happy shopping.')
        return 'Greeting successfully printed.'

    hello_world_greeting = python_operator.PythonOperator(
        task_id='python_1', python_callable=hello_world)

    sales_greeting = python_operator.PythonOperator(task_id='python_2',
                                                    python_callable=greeting)

    bash_greeting = bash_operator.BashOperator(
        task_id='bye_bash', bash_command='echo Goodbye! Hope to see you soon.')

    hello_world_greeting >> sales_greeting >> bash_greeting
# Any task you create within the context manager is automatically added to the
# DAG object.
with models.DAG(
        'composer_sample_simple_greeting',
        schedule_interval=datetime.timedelta(days=1),
        default_args=default_dag_args) as dag:
    # [END composer_simple_define_dag_airflow_1]
    # [START composer_simple_operators_airflow_1]
    def greeting():
        import logging
        logging.info('Hello World!')

    # An instance of an operator is called a task. In this case, the
    # hello_python task calls the "greeting" Python function.
    hello_python = python_operator.PythonOperator(
        task_id='hello',
        python_callable=greeting)

    # Likewise, the goodbye_bash task calls a Bash script.
    goodbye_bash = bash_operator.BashOperator(
        task_id='bye',
        bash_command='echo Goodbye.')
    # [END composer_simple_operators_airflow_1]

    # [START composer_simple_relationships_airflow_1]
    # Define the order in which the tasks complete by using the >> and <<
    # operators. In this example, hello_python executes before goodbye_bash.
    hello_python >> goodbye_bash
    # [END composer_simple_relationships_airflow_1]
# [END composer_simple_airflow_1]
Beispiel #10
0
default_dag_args = {
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 0,
    #    'retry_delay': datetime.timedelta(minutes=5),
    'start_date': datetime.datetime.today() - datetime.timedelta(days=1)
}

with models.DAG('lastfm-1k-ingest',
                schedule_interval=datetime.timedelta(days=1),
                default_args=default_dag_args) as dag:

    dataflow = dataflow_operator.DataFlowPythonOperator(
        task_id='ingest-users-dataflow',
        py_file='gs://{}/lastfm-dataset-1K/code/ingest-users.py'.format(
            PROJECT),
        job_name='ingest-users-dataflow',
        py_options=[],
        dataflow_default_options={
            'project': PROJECT,
            'region': 'europe-west1'
        },
        options={},
        poll_sleep=30)

    start = bash_operator.BashOperator(task_id='start',
                                       bash_command='echo "Start"')
    end = bash_operator.BashOperator(task_id='end', bash_command='echo "End"')

    start >> dataflow >> end
Beispiel #11
0
import datetime

import airflow
from airflow.operators import bash_operator

default_args = {
    'owner': 'Composer Example',
    'depends_on_past': False,
    'email': [''],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=5),
    'start_date': datetime.datetime(2017, 1, 1),
}

with airflow.DAG(
        'sample_dag',
        default_args=default_args,
        # Not scheduled, trigger only
        schedule_interval=None) as dag:

    # Print the dag_run's configuration, which includes information about the
    # Cloud Storage object change.
    print_gcs_info = bash_operator.BashOperator(
        task_id='print_gcs_info',
        bash_command='echo Running: {{ dag_run.conf }}')
import airflow
from airflow.operators import bash_operator
from airflow.operators import python_operator
from airflow.contrib.operators import kubernetes_pod_operator

YESTERDAY = datetime.datetime.now(
    tz=datetime.timezone.utc) - datetime.timedelta(days=1)

default_args = {'start_date': YESTERDAY}

dag = airflow.DAG('simple_workflow_dag',
                  default_args=default_args,
                  schedule_interval=None)

bash_operator_task = bash_operator.BashOperator(
    task_id='bash_operator_example_task',
    bash_command='echo "Hello from Airflow Bash Operator"',
    dag=dag)


def python_operator_func():
    print("Hello from Airflow Python Operator")


python_operator_task = python_operator.PythonOperator(
    task_id='python_operator_example_task',
    python_callable=python_operator_func,
    dag=dag)

kubernetes_pod_operator_task = kubernetes_pod_operator.KubernetesPodOperator(
    task_id='k8s_pod_operator_example_task',
    name='k8s_pod_example',
Beispiel #13
0
# Define a DAG (directed acyclic graph) of tasks.
# Any task you create within the context manager is automatically added to the
# DAG object.
with models.DAG(
        'composer_sample_gcloud_ssh_2',
        schedule_interval=datetime.timedelta(days=1),
        default_args=default_dag_args) as dag:
    def greeting():
        import logging
        logging.info('Hello World!')

    # An instance of an operator is called a task. In this case, the
    # hello_python task calls the "greeting" Python function.
    hello_python = python_operator.PythonOperator(
        task_id='hello',
        python_callable=greeting)

    gcloud_ssh = bash_operator.BashOperator(
        task_id='gcloud_ssh',
        bash_command=gcloud_command
    )

    # Likewise, the goodbye_bash task calls a Bash script.
    goodbye_bash = bash_operator.BashOperator(
        task_id='bye',
        bash_command='echo Goodbye.')

    # Define the order in which the tasks complete by using the >> and <<
    # operators. In this example, hello_python executes before goodbye_bash.
    hello_python >> gcloud_ssh >> goodbye_bash
    'email': [''],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=5),
    'start_date': datetime.datetime(2017, 1, 1),
}

with airflow.DAG('composer_trigger_gcs_to_bq_dag',
                 default_args=default_args, schedule_interval=None) as \
    dag:  # Not scheduled, trigger only

    # Print the dag_run's configuration, which includes information about the
    # Cloud Storage object change.

    print_gcs_info = bash_operator.BashOperator(
        task_id='print_gcs_info', bash_command='echo {{ dag_run.conf }}')

    # [Create dataset in BQ]

    create_test_dataset = \
        bash_operator.BashOperator(task_id='create_test_dataset',
                                   bash_command='bq mk airflow_test1')

    # [Upload Csv from GCS to BQ using Load ]

    Upload_csv = bash_operator.BashOperator(
        task_id='Upload_csv',
        bash_command=
        'bq load --autodetect --source_format=CSV airflow_test1.simple gs://prp-source/simple.csv'
    )
default_dag_args = {
    'start_date': yesterday,
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=2)
}

with models.DAG('python_and_bash_with_all_success_trigger',
                schedule_interval=datetime.timedelta(days=1),
                default_args=default_dag_args) as dag:

    def hello_world():
        raise ValueError('Oops! something went wrong.')
        print('Hello World!')
        return 1

    def greeting():
        print('Greetings from SpikeySales! Happy shopping.')
        return 'Greeting successfully printed.'

    hello_world_greeting = python_operator.PythonOperator(
        task_id='python_1', python_callable=hello_world)

    spikeysales_greeting = python_operator.PythonOperator(
        task_id='python_2', python_callable=greeting)

    bash_greeting = bash_operator.BashOperator(
        task_id='bye_bash',
        bash_command='echo Goodbye! Hope to see you soon.',
        trigger_rule=trigger_rule.TriggerRule.ALL_SUCCESS)

    hello_world_greeting >> spikeysales_greeting >> bash_greeting
Beispiel #16
0
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=5),
    'start_date': TOMORROW,
}

dag = DAG(
    'Airflow_Bigquery',
    default_args=default_args,
    description=
    'Load and transform data from Google cloud storage to Google bigquery with Airflow',
)

start_operator = dummy_operator.DummyOperator(task_id='Begin_execution',
                                              dag=dag)

create_dataset = bash_operator.BashOperator(
    task_id='create_airflow_iot_dataset', bash_command='bq mk iot', dag=dag)

load_csv = gcs_to_bq.GoogleCloudStorageToBigQueryOperator(
    task_id='gcs_to_bq',
    bucket='bucket1_hazem',
    source_objects=['heartRate-final.csv'],
    destination_project_dataset_table='iot.heartRateTable',
    trigger_rule='all_done',
    skip_leading_rows=1,
    schema_fields=[
        {
            'name': 'sensorID',
            'type': 'STRING',
            'mode': 'NULLABLE'
        },
        {
Beispiel #17
0
#
#     https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""An example DAG demonstrating use of variables and how to test it."""

import datetime

from airflow import models
from airflow.operators import bash_operator
from airflow.operators import dummy_operator

yesterday = datetime.datetime.now() - datetime.timedelta(days=1)

default_dag_args = {
    'start_date': yesterday,
}

with models.DAG('composer_sample_cycle',
                schedule_interval=datetime.timedelta(days=1),
                default_args=default_dag_args) as dag:
    start = dummy_operator.DummyOperator(task_id='start')
    end = dummy_operator.DummyOperator(task_id='end')
    variable_example = bash_operator.BashOperator(
        task_id='variable_example',
        bash_command='echo project_id=' + models.Variable.get('gcp_project'))
Beispiel #18
0
    remove_cluster = dataproc_operator.DataprocClusterDeleteOperator(
        project_id=PROJECT,
        task_id="delete_cluster",
        cluster_name='vf-polimi-demo',
        region='europe-west1')

    def check_batch_kpi_scheduled_cluster_running(**kwargs):
        ti = kwargs['ti']
        xcom_value = ti.xcom_pull(task_ids='batch_kpi_scheduled_cluster')
        if xcom_value == "vf-polimi-demo":
            return 'delete_cluster'
        else:
            return 'end'

    branch_batch_kpi_scheduled_active_cluster = BranchPythonOperator(
        task_id='check_batch_kpi_scheduled_cluster',
        provide_context=True,
        python_callable=check_batch_kpi_scheduled_cluster_running)

    batch_kpi_scheduled_cluster_running = bash_operator.BashOperator(
        task_id='batch_kpi_scheduled_cluster',
        bash_command=
        "gcloud dataproc clusters list --region europe-west1 | grep 'vf-polimi-demo'| awk '{print $1; exit}'",
        xcom_push=True,
        trigger_rule="all_done")

    end_pipeline = dummy_operator.DummyOperator(task_id='end')

    create_dataproc_cluster >> run_batch_kpi_scheduled >> batch_kpi_scheduled_cluster_running >> branch_batch_kpi_scheduled_active_cluster >> [
        remove_cluster, end_pipeline
    ]
        'bindexis_end2end',
        schedule_interval=datetime.timedelta(days=1), # or in cron Format
        default_args=default_dag_args) as dag:

    # An instance of an operator is called a task. In this case, the
    # hello_python task calls the "greeting" Python function.
    bindexis_python = python_operator.PythonOperator(
        task_id='bindexis-dataload-start',
        python_callable=def_bindexis_dataload.bindexis_dataload,
        op_kwargs={'user_bindexis': Variable.get("user_bindexis"),
                    'pw_bindexis': Variable.get("password_bindexis")},
        retries=2)

    # Likewise, the goodbye_bash task calls a Bash script.
    end_bash = bash_operator.BashOperator(
        task_id='bindexis-end',
        bash_command='echo bindexis-dataload-end.')

    # Define the order in which the tasks complete by using the >> and <<
    # operators. In this example, bindexis_python executes before end_bash.
    bindexis_python >> end_bash


# Send email confirmation
#email_summary = EmailOperator(
#    task_id='email_summary',
#    to=models.Variable.get('email'),
#    subject='ERROR: Bindexis Dataload and Trigger',
#    html_content="""
#    Bindexis Dataload fails.
#    Error: {ERROR_FROM_LOG}.
import datetime
from airflow import models
from airflow.operators import bash_operator

with models.DAG(
       'composer_hello_world',
       schedule_interval=datetime.timedelta(days=1),
       default_args={'start_date': datetime.datetime(2020, 9, 1),}) as dag:

   goodbye_bash = bash_operator.BashOperator(
       task_id='hello_world',
       bash_command='ls $DAGS_FOLDER')

import datetime

from airflow import models
from airflow.operators import bash_operator

default_dag_args = {
    'start_date': datetime.datetime(2018, 12, 17, 0, 0),
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=2),
    'project_id': models.Variable.get('gcp_project')
}

source_bucket = models.Variable.get('gcs_source_bucket')
dest_bucket = models.Variable.get('gcs_dest_bucket')

with models.DAG('transferring_data_from_gcs_to_gcs',
                schedule_interval=None,
                default_args=default_dag_args) as dag:

    transfer_data_gcs_to_gcs = bash_operator.BashOperator(
        task_id='data_transfer_gcs_to_gcs',
        bash_command='gsutil cp -r {source} {dest}'.format(
            source=source_bucket, dest=dest_bucket))

    transfer_data_gcs_to_gcs
Beispiel #22
0
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=5),
    'project_id': models.Variable.get('gcp_project')
}

with models.DAG(
        'composer_sample_bq_notify',
        schedule_interval=datetime.timedelta(weeks=4),
        default_args=default_dag_args) as dag:
    # [END composer_notify_failure]

    # [START composer_bash_bq]
    # Create BigQuery output dataset.
    make_bq_dataset = bash_operator.BashOperator(
        task_id='make_bq_dataset',
        # Executing 'bq' command requires Google Cloud SDK which comes
        # preinstalled in Cloud Composer.
        bash_command='bq ls {} || bq mk {}'.format(
            bq_dataset_name, bq_dataset_name))
    # [END composer_bash_bq]

    # [START composer_bigquery]
    # Query recent StackOverflow questions.
    bq_recent_questions_query = bigquery_operator.BigQueryOperator(
        task_id='bq_recent_questions_query',
        sql="""
        SELECT owner_display_name, title, view_count
        FROM `bigquery-public-data.stackoverflow.posts_questions`
        WHERE creation_date < CAST('{max_date}' AS TIMESTAMP)
            AND creation_date >= CAST('{min_date}' AS TIMESTAMP)
        ORDER BY view_count DESC
        LIMIT 100
    from airflow.contrib.operators import gcs_to_bq
except ImportError:
    pass

if gcs_to_bq is not None:
    args = {
        'owner': 'Datametica',
        'start_date': airflow.utils.dates.days_ago(2)
    }

    dag = models.DAG(dag_id='gcs_to_bq_operator',
                     default_args=args,
                     schedule_interval=None)

    create_test_dataset = bash_operator.BashOperator(
        task_id='create_airflow_test_dataset_1',
        bash_command='bq mk airflow_test_1',
        dag=dag)

    # [START howto_operator_gcs_to_bq]
    load_csv = gcs_to_bq.GoogleCloudStorageToBigQueryOperator(
        task_id='gcs_to_bq_example',
        bucket='dataflow_poc11',
        source_objects=['task1.csv'],
        destination_project_dataset_table=
        'gcs-bq.airflow_test.gcs_to_bq_table_1',
        schema_fields=[
            {
                'name': 'name',
                'type': 'STRING',
                'mode': 'NULLABLE'
            },
Beispiel #24
0
        if (x <= 2):
            return 'hello_spikey'

        else:
            return 'dummy'

    run_this_first = dummy_operator.DummyOperator(task_id='run_this_first')

    # BranchPythonOperator takes in a callable which returns the task id of the next task.
    branching = python_operator.BranchPythonOperator(
        task_id='branching', python_callable=makeBranchChoice)

    run_this_first >> branching

    spikeysales_greeting = python_operator.PythonOperator(
        task_id='hello_spikey', python_callable=greeting)

    dummy_followed_python = dummy_operator.DummyOperator(
        task_id='follow_python')

    dummy = dummy_operator.DummyOperator(task_id='dummy')

    bash_greeting = bash_operator.BashOperator(
        task_id='bye_bash',
        bash_command='echo Goodbye! Hope to see you soon.',
        trigger_rule='one_success')

    branching >> spikeysales_greeting >> dummy_followed_python >> bash_greeting
    branching >> dummy >> bash_greeting
import datetime

import airflow
from airflow.operators import bash_operator

YESTERDAY = datetime.datetime.now() - datetime.timedelta(days=1)

default_args = {
    'owner': 'Composer Example',
    'depends_on_past': False,
    'email': [''],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=5),
    'start_date': YESTERDAY,
}

with airflow.DAG('composer_sample_dag',
                 'catchup=False',
                 default_args=default_args,
                 schedule_interval=None) as dag:
    #schedule_interval=datetime.timedelta(days=1)) as dag:

    # Print the dag_run id from the Airflow logs
    print_dag_run_conf = bash_operator.BashOperator(
        task_id='print_dag_run_conf',
        bash_command='echo "run_id={{ run_id }} | dag_run={{ dag_run }}"')
    def greeting(ds, **kwargs):
        import logging
        conf, ti = kwargs["dag_run"].conf or {}, kwargs["ti"]
        logging.info(
            f'Hello! conf: {conf.get("key")}, xcom: {ti.xcom_pull(key="xcomkey")}'
        )

    set_xcoms = python_operator.PythonOperator(task_id="set_xcoms",
                                               provide_context=True,
                                               python_callable=set_xcom_fn)

    greet_python = python_operator.PythonOperator(task_id='greeting',
                                                  provide_context=True,
                                                  python_callable=greeting)

    # bash_command='echo dagrun: {{ dag_run.conf }} / airflow_val: {{ var.value.project_id }}'
    yo_dagrun = bash_operator.BashOperator(
        task_id='yo',
        bash_command=
        'echo dagrun: {{ dag_run.conf["key"] }} / airflow_val: {{ var.value.project_id }}'
    )

    # Likewise, the goodbye_bash task calls a Bash script.
    goodbye_bash = bash_operator.BashOperator(
        task_id='bye', bash_command='echo {{ ti.xcom_pull(key="xcomkey") }}')

    # Define the order in which the tasks complete by using the >> and <<
    # operators. In this example, hello_python executes before goodbye_bash.
    set_xcoms >> greet_python >> yo_dagrun >> goodbye_bash
Beispiel #27
0
    datetime.datetime.min.time())

# [START composer_notify_failure]
default_dag_args = {
    'start_date': yesterday,
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=5),
    'project_id': models.Variable.get('gcp_project')
}

with models.DAG(
        'bqml_demo_composer',
        schedule_interval=datetime.timedelta(weeks=4),
        default_args=default_dag_args) as dag:
    # [END composer_notify_failure]

    
    # Create BQML Model
    create_bqml_model = bash_operator.BashOperator(
        task_id='create_bqml_model',
        bash_command='bq query "$(gsutil cat gs://anand-bq-test-2-2/bqdemo/query2.txt)"',
        trigger_rule=trigger_rule.TriggerRule.ALL_DONE)
    


    # Define DAG dependencies.
    (
        create_bqml_model
    )
    
Beispiel #28
0
import airflow
import datetime
from airflow import DAG
from airflow.operators import bash_operator, dummy_operator

default_args = {
    'owner': 'Nitin Ware',
    'depends_on_past': False,
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=5),
    'start_date': airflow.utils.dates.days_ago(1),
}

dag = DAG(
    'bash_dag',
    'catchup=False',
    default_args=default_args,
    schedule_interval="@once",
)

start_dag = dummy_operator.DummyOperator(
    task_id='start',
    dag=dag,
)

bash_dag = bash_operator.BashOperator(task_id='bash_command',
                                      bash_command='echo Hello Bash.',
                                      dag=dag)

start_dag >> bash_dag
"""
Simple DAG for using Airflow
"""
import datetime
import logging
from airflow import models
from airflow.operators import bash_operator
from airflow.operators import python_operator

DEFAULT_DAG_ARGS = {'start_date': datetime.datetime(2018, 1, 1)}

with models.DAG('composer_sample_greeting',
                schedule_interval=datetime.timedelta(days=1),
                default_args=DEFAULT_DAG_ARGS) as dag:

    def _hello_python():
        """
        A method here
        """
        logging.info('Hello World!')

    HELLO_PYTHON = python_operator.PythonOperator(
        task_id='HELLO_PYTHON', python_callable=_hello_python)

    GOODBYE_BASH = bash_operator.BashOperator(task_id='GOODBYE_BASH',
                                              bash_command='echo Goodbye')

    HELLO_PYTHON >> GOODBYE_BASH
Beispiel #30
0
    '{{ macros.ds_format(macros.ds_add(ds, 0), "%Y-%m-%d", "%Y%m%d") }}Z-' +
    user_id + '-'
]
bd_dates = [
    '{{ macros.ds_format(macros.ds_add(ds, -1), "%Y-%m-%d", "%Y%m%d") }}',
    '{{ macros.ds_format(macros.ds_add(ds, 0), "%Y-%m-%d", "%Y%m%d") }}'
]

# 한국시 기준 데이터로 보여주기 위해 UTC 기준2일치 데이터를 처리
for i in range(2):
    output_directory = '{}/data/log/rescuetime'.format(datalake_gs)
    # 한국시(+9:00) 기준 레스큐 타임 데이터를 UTC 기준으로 저장하기 위해 한번의 2일치 데이터를 조회한다.
    load_rescuetime = bash_operator.BashOperator(
        task_id=('load_rescuetime-%s' % i),
        bash_command=
        'java -jar ${{AIRFLOW_HOME}}/dags/dd-importers-load-rescuetime.jar -user_id={} -api_key={} -input_begin_date={} -input_end_date={} -input_timezone=Asia/Seoul -output_date={} -output_timezone=UTC -output_directory={}  -output_filenameprefix={} -shard_size=3'
        .format(user_id, api_key, input_begin_dates[i], input_end_dates[i],
                input_begin_dates[i], output_directory,
                output_filename_prefixes[i]),
        dag=dag)

    create_rescuetime_bd = dataflow_operator.DataflowTemplateOperator(
        task_id=('create_rescuetime_bd-%s' % i),
        template='{}/templates/dd-etls-create-rescuetime'.format(dataflow_gs),
        parameters={
            'runner':
            'DataflowRunner',
            'inputFilePattern':
            '{}/data/log/rescuetime/{}Z-*'.format(datalake_gs, bd_dates[i]),
            'outputTable':
            '{}:dw_datadriver.rescuetime_tbl_bd_data${}'.format(
                project_id, bd_dates[i])