Esempio n. 1
0
    def __init__(self, component_name, task_id, parent_dag, input_dict,
                 output_dict, exec_properties, driver_options, driver_class,
                 executor_class, additional_pipeline_args,
                 metadata_connection_config, logger_config):
        super(_TfxWorker, self).__init__(
            dag_id=task_id,
            schedule_interval=None,
            start_date=parent_dag.start_date,
            user_defined_filters={'b64encode': base64.b64encode})
        adaptor = airflow_adapter.AirflowAdapter(
            component_name=component_name,
            input_dict=input_dict,
            output_dict=output_dict,
            exec_properties=exec_properties,
            driver_options=driver_options,
            driver_class=driver_class,
            executor_class=executor_class,
            additional_pipeline_args=additional_pipeline_args,
            metadata_connection_config=metadata_connection_config,
            logger_config=logger_config)
        # Before the executor runs, check if the artifact already exists
        checkcache_op = python_operator.BranchPythonOperator(
            task_id=task_id + '.checkcache',
            provide_context=True,
            python_callable=adaptor.check_cache_and_maybe_prepare_execution,
            op_kwargs={
                'uncached_branch': task_id + '.exec',
                'cached_branch': task_id + '.noop_sink',
            },
            dag=self)
        tfx_op = python_operator.PythonOperator(
            task_id=task_id + '.exec',
            provide_context=True,
            python_callable=adaptor.python_exec,
            op_kwargs={
                'cache_task_name': task_id + '.checkcache',
            },
            dag=self)
        noop_sink_op = dummy_operator.DummyOperator(task_id=task_id +
                                                    '.noop_sink',
                                                    dag=self)
        publishexec_op = python_operator.PythonOperator(
            task_id=task_id + '.publishexec',
            provide_context=True,
            python_callable=adaptor.publish_exec,
            op_kwargs={
                'cache_task_name': task_id + '.checkcache',
                'exec_task_name': task_id + '.exec',
            },
            dag=self)

        tfx_op.set_upstream(checkcache_op)
        publishexec_op.set_upstream(tfx_op)
        noop_sink_op.set_upstream(checkcache_op)
    def add_export_task(toggle, task_id, python_callable, dependencies=None):
        if toggle:

            def python_callable_with_fallback(**kwargs):
                for index, provider_uri in enumerate(provider_uris):
                    kwargs['provider_uri'] = provider_uri
                    try:
                        python_callable(**kwargs)
                        break
                    except Exception as e:
                        if index < (len(provider_uris) - 1):
                            logging.exception(
                                'An exception occurred. Trying another uri')
                        else:
                            raise e

            operator = python_operator.PythonOperator(
                task_id=task_id,
                python_callable=python_callable_with_fallback,
                provide_context=True,
                execution_timeout=timedelta(hours=15),
                dag=dag,
            )
            if dependencies is not None and len(dependencies) > 0:
                for dependency in dependencies:
                    if dependency is not None:
                        dependency >> operator
            return operator
        else:
            return None
Esempio n. 3
0
def create_python_operator(dag, workflow, job):
    from airflow.operators import python_operator

    return python_operator.PythonOperator(
        dag=dag,
        task_id=job.id,
        python_callable=callable_factory(job, workflow.dt_as_datetime),
        retries=job.retry_count,
        retry_delay=timedelta(seconds=job.retry_pause_sec),
        provide_context=True)
Esempio n. 4
0
 def call(self, dag):
     t_up = self.f_task(dag)
     t = python_operator.PythonOperator(
         task_id=self.id,
         python_callable=self.run,
         provide_context=True,
         templates_dict={"result": fairflow.utils.xcom_result(t_up)},
         dag=dag)
     t.set_upstream(t_up)
     return t
Esempio n. 5
0
 def call(self, dag):
     tasks = [fop(dag) for fop in self.fops]
     t = python_operator.PythonOperator(
         task_id=self.id,
         python_callable=self.run,
         provide_context=True,
         templates_dict={
             ut.task_id : utils.xcom_result(ut)
             for ut in tasks
         },
         dag=dag
     )
     t.set_upstream(tasks)
     return t
Esempio n. 6
0
 def call(self, dag):
     tasks_upstream = [fop(dag) for fop in self.fops_upstream
                       ]  # instantiate all upstream tasks
     t_sum = python_operator.PythonOperator(
         task_id=self.id,
         python_callable=self.run,
         provide_context=True,
         templates_dict={
             ut.task_id: fairflow.utils.xcom_result(ut)
             for ut in tasks_upstream
         },
         dag=dag)
     t_sum.set_upstream(tasks_upstream)
     return t_sum
Esempio n. 7
0
def load_subdag(parent_dag_name, child_dag_name, args):
    dag_subdag = DAG(
        dag_id='{0}.{1}'.format(parent_dag_name, child_dag_name),
        default_args=args,
        schedule_interval="@daily",
    )
    with dag_subdag:
        for i in range(3):
            t = python_operator.PythonOperator(
                task_id='load_subdag_{0}'.format(i),
                python_callable=sleepFortask,
                op_kwargs={'key1': i},
                dag=dag_subdag)

    return dag_subdag
Esempio n. 8
0
def _add_update_airflow_variable_task(
        dag: models.DAG) -> python_operator.PythonOperator:
    """Adds a airflow variable with the new dataset id.

  Args:
    dag: The dag that the task needs to be added to.

  Returns:
    PythonOperator used to update airflow variable within a DAG.
  """
    return python_operator.PythonOperator(
        task_id=_UPDATE_AIRFLOW_VARS_TASK,
        python_callable=_set_model_var,
        provide_context=True,
        dag=dag,
    )
 def add_export_task(toggle, task_id, python_callable, dependencies=None):
     if toggle:
         operator = python_operator.PythonOperator(
             task_id=task_id,
             python_callable=python_callable,
             provide_context=True,
             execution_timeout=timedelta(hours=15),
             dag=dag,
         )
         if dependencies is not None and len(dependencies) > 0:
             for dependency in dependencies:
                 if dependency is not None:
                     dependency >> operator
         return operator
     else:
         return None
Esempio n. 10
0
def build_python_operator(operator_ref, dag_ref):
    """
    Builds a DAG operator of type: PythonOperator.
    Args:
        operator_ref (string): the definition of the operator
        dag_ref (string): the reference to the dag to associate this operator
    """
    dynamic_func = {}
    exec("\n".join(operator_ref['function_def']), dynamic_func)

    op = python_operator.PythonOperator(
        task_id=operator_ref['task_id'],
        python_callable=dynamic_func[operator_ref['function_name']],
        dag=dag_ref)

    return op
Esempio n. 11
0
 def call(self, dag):
     """Instantiate upstream tasks, this task and set dependencies. Returns: task"""
     model_tasks = [  # instantiate tasks for running the different models
         f(dag)  # by calling their FOperators on the current `dag`
         for f in self.
         fops_models  # notice that we do not know about the models upstream dependencies!
     ]
     t = python_operator.PythonOperator(
         task_id=self.__class__.__name__,
         python_callable=self.compare,
         provide_context=True,
         templates_dict={
             "model_taskids": [mt.task_id for mt in model_tasks]
         },
         dag=dag)
     t.set_upstream(model_tasks)
     return t
Esempio n. 12
0
    def add_save_checkpoint_tasks(dependencies=None):
        def save_checkpoint(execution_date, **kwargs):
            with TemporaryDirectory() as tempdir:
                local_path = os.path.join(tempdir, "checkpoint.txt")
                remote_path = "checkpoint/block_date={block_date}/load_complete_checkpoint.txt".format(
                    block_date=execution_date.strftime("%Y-%m-%d"))
                open(local_path, mode='a').close()
                upload_to_gcs(gcs_hook=GoogleCloudStorageHook(
                    google_cloud_storage_conn_id="google_cloud_default"),
                              bucket=output_bucket,
                              object=remote_path,
                              filename=local_path)

        save_checkpoint_task = python_operator.PythonOperator(
            task_id='save_checkpoint',
            python_callable=save_checkpoint,
            provide_context=True,
            execution_timeout=timedelta(hours=1),
            dag=dag,
        )
        if dependencies is not None and len(dependencies) > 0:
            for dependency in dependencies:
                dependency >> save_checkpoint_task
        return save_checkpoint_task
    }

    # Main Dataflow task that will process and load the input delimited file.
    # TODO: Specify the type of operator we need to call to invoke DataFlow
    dataflow_task = dataflow_operator.DataFlowPythonOperator(
        task_id="process-delimited-and-push",
        py_file=DATAFLOW_FILE,
        options=job_args)

    # Here we create two conditional tasks, one of which will be executed
    # based on whether the dataflow_task was a success or a failure.
    success_move_task = python_operator.PythonOperator(
        task_id='success-move-to-completion',
        python_callable=move_to_completion_bucket,
        # A success_tag is used to move
        # the input file to a success
        # prefixed folder.
        op_args=[COMPLETION_BUCKET, SUCCESS_TAG],
        provide_context=True,
        trigger_rule=TriggerRule.ALL_SUCCESS)

    failure_move_task = python_operator.PythonOperator(
        task_id='failure-move-to-completion',
        python_callable=move_to_completion_bucket,
        # A failure_tag is used to move
        # the input file to a failure
        # prefixed folder.
        op_args=[COMPLETION_BUCKET, FAILURE_TAG],
        provide_context=True,
        trigger_rule=TriggerRule.ALL_FAILED)
Esempio n. 14
0
# Any task you create within the context manager is automatically added to the
# DAG object.
with models.DAG(
        'composer_sample_simple_greeting',
        schedule_interval=datetime.timedelta(days=1),
        default_args=default_dag_args) as dag:
    # [END composer_simple_define_dag_airflow_1]
    # [START composer_simple_operators_airflow_1]
    def greeting():
        import logging
        logging.info('Hello World!')

    # An instance of an operator is called a task. In this case, the
    # hello_python task calls the "greeting" Python function.
    hello_python = python_operator.PythonOperator(
        task_id='hello',
        python_callable=greeting)

    # Likewise, the goodbye_bash task calls a Bash script.
    goodbye_bash = bash_operator.BashOperator(
        task_id='bye',
        bash_command='echo Goodbye.')
    # [END composer_simple_operators_airflow_1]

    # [START composer_simple_relationships_airflow_1]
    # Define the order in which the tasks complete by using the >> and <<
    # operators. In this example, hello_python executes before goodbye_bash.
    hello_python >> goodbye_bash
    # [END composer_simple_relationships_airflow_1]
# [END composer_simple_airflow_1]
Esempio n. 15
0
    'start_date': datetime.datetime(2021, 1, 5, 8, 0, 0),
}

dag = airflow.DAG('synth_dag',
                  'catchup=False',
                  default_args=default_args,
                  schedule_interval=datetime.timedelta(minutes=15))


def call_synth_info_api():
    url = "https://synthinfo-ue.a.run.app/upload"
    r = requests.get(url)
    print(r)


synthInfoCall = python_operator.PythonOperator(
    task_id="call_the_api", python_callable=call_synth_info_api, dag=dag)

synthGCStoBQSync = GoogleCloudStorageToBigQueryOperator(
    task_id='gcs_to_bq',
    bucket='synth-info',
    source_objects=['synthinfo.csv'],
    schema_fields=[{
        "mode": "NULLABLE",
        "name": "address",
        "type": "STRING"
    }, {
        "mode": "NULLABLE",
        "name": "collateralToken",
        "type": "STRING"
    }, {
        "mode": "NULLABLE",
        'PRZ_{{dag_run.conf["name"][:dag_run.conf["name"].rfind("/")]}}.{{"_".join(dag_run.conf["name"][:dag_run.conf["name"].rfind(".")].split("_")[2:])}}',
        'fields': g_fields,
        'load_dt': '{{ dag_run.conf["bqTimestamp"]}}',
        'op_dict': g_operations_dict
    }

    # Main Dataflow task
    TSK_dataflow_file_ingestion = dataflow_operator.DataFlowPythonOperator(
        task_id="tsk-dataflow-file-ingestion",
        py_file=DATAFLOW_FILE,
        options=job_args)

    # Upon Dataflow task success the TSK_move_into_arc_bucket starts
    TSK_move_into_arc_bucket = python_operator.PythonOperator(
        task_id='TSK_move_into_arc_bucket',
        python_callable=DPLF_move_into_arc_bucket,
        op_args=[g_output_bucket],
        provide_context=True,
        trigger_rule=TriggerRule.ALL_SUCCESS)

    # Upon Dataflow task failure the TSK_move_into_inv_bucket starts
    TSK_move_into_inv_bucket = python_operator.PythonOperator(
        task_id='TSK_move_into_inv_bucket',
        python_callable=DPLF_move_into_inv_bucket,
        op_args=[g_failed_bucket],
        provide_context=True,
        trigger_rule=TriggerRule.ONE_FAILED)

    DPLF_ConsistencyCheck = python_operator.PythonOperator(
        task_id='DPLF_ConsistencyCheck',
        python_callable=DPLF_ConsistencyCheck,
        provide_context=True)
Esempio n. 17
0
from airflow.operators import bash_operator
from airflow.operators import python_operator

yesterday = datetime.datetime.combine(
    datetime.datetime.today() - datetime.timedelta(1),
    datetime.datetime.min.time())

default_dag_args = {'start_date': yesterday}

with models.DAG('running_python_and_bash_operator',
                schedule_interval=datetime.timedelta(days=1),
                default_args=default_dag_args) as dag:

    def hello_world():
        print('Hello World!')
        return 1

    def greeting():
        print('Greetings from GCP! Happy shopping.')
        return 'Greeting successfully printed.'

    hello_world_greeting = python_operator.PythonOperator(
        task_id='python_1', python_callable=hello_world)

    sales_greeting = python_operator.PythonOperator(task_id='python_2',
                                                    python_callable=greeting)

    bash_greeting = bash_operator.BashOperator(
        task_id='bye_bash', bash_command='echo Goodbye! Hope to see you soon.')

    hello_world_greeting >> sales_greeting >> bash_greeting
Esempio n. 18
0

def task_completion(status, **kwargs):
    if status == SUCCESS_TAG:
        logging.info('successfully processed minutes document %s', kwargs['dag_run'].conf['event_date'])
    else:
        logging.info('failure in processing minutes document', kwargs['dag_run'].conf['event_date'])


with models.DAG(dag_id='MinutesProcessing',
                description='A Dag Triggering Minutes Processing Job',
                schedule_interval=None, default_args=DEFAULT_DAG_ARGS) as dag:
    # Args required for the Dataflow job.

    downloadminutes = python_operator.PythonOperator(task_id='downloadminutes',
                                                     python_callable=download_minutes,
                                                     op_args=[GCP_BUCKET, TARGET_EVENT],
                                                     provide_context=True)

    # use template for the xcom
    job_args = {
        'input': "{{ task_instance.xcom_pull(task_ids='downloadminutes') }}",
        'output': OUTPUT_FILE_PATH
    }

    dataflow_task = dataflow_operator.DataflowTemplateOperator(
        template=DATAFLOW_MINUTES_TEMPLATE,
        task_id="processminutes",
        parameters=job_args)

    # Here we create two conditional tasks, one of which will be executed
    # based on whether the dataflow_task was a success or a failure.
Esempio n. 19
0
    job_args = {
        "input":
        'gs://{{ dag_run.conf["bucket"] }}/{{ dag_run.conf["name"] }}',
        "output": models.Variable.get("bq_output_table"),
        "fields": models.Variable.get("input_field_names"),
    }

    # Main Dataflow task that will process and load the input delimited file.
    dataflow_task = dataflow_operator.DataFlowPythonOperator(
        task_id="process-data", py_file=DATAFLOW_FILE, options=job_args)

    # trigger on sucess
    success_move_task = python_operator.PythonOperator(
        task_id="success-move-to-completion",
        python_callable=move_to_completion_bucket,
        op_args=[COMPLETION_BUCKET, SUCCESS_TAG],
        provide_context=True,
        trigger_rule=TriggerRule.ALL_SUCCESS,
    )

    # trigger on failure
    failure_move_task = python_operator.PythonOperator(
        task_id="failure-move-to-completion",
        python_callable=move_to_completion_bucket,
        op_args=[COMPLETION_BUCKET, FAILURE_TAG],
        provide_context=True,
        trigger_rule=TriggerRule.ALL_FAILED,
    )

    # After moving the bucket send email or other type of notification about success or failure
    success_message = "Successfully processed the latest file, moved to gs://{}.".format(
Esempio n. 20
0
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=5),
    'start_date': airflow.utils.dates.days_ago(1),
}

dag = DAG('python_dag', 'catchup=False', default_args=default_args, schedule_interval="@once",)

start_dag = dummy_operator.DummyOperator(
    task_id='start',
    default_args=default_args,
    dag=dag,
)

def python_greeting(**kwargs):
    context = kwargs
    print("Dag: ", context['dag_run'].dag_id)
    print("Task: ", context['task'].task_id)
    print("Current Date Time: ", datetime.datetime.now())
    print('Hello Python!')

python_dag = python_operator.PythonOperator(
    task_id='python_func_call',
    python_callable=python_greeting,
    provide_context=True,
    default_args=default_args,
    dag=dag,
)

start_dag.set_downstream(python_dag)

default_args = {'start_date': YESTERDAY}

dag = airflow.DAG('simple_workflow_dag',
                  default_args=default_args,
                  schedule_interval=None)

bash_operator_task = bash_operator.BashOperator(
    task_id='bash_operator_example_task',
    bash_command='echo "Hello from Airflow Bash Operator"',
    dag=dag)


def python_operator_func():
    print("Hello from Airflow Python Operator")


python_operator_task = python_operator.PythonOperator(
    task_id='python_operator_example_task',
    python_callable=python_operator_func,
    dag=dag)

kubernetes_pod_operator_task = kubernetes_pod_operator.KubernetesPodOperator(
    task_id='k8s_pod_operator_example_task',
    name='k8s_pod_example',
    namespace='default',
    image='bash',
    cmds=['echo'],
    arguments=['"Hello from Airflow Kubernetes Pod Operator"'],
    dag=dag)
Esempio n. 22
0
}

# Define a DAG (directed acyclic graph) of tasks.
# Any task you create within the context manager is automatically added to the
# DAG object.
dag = DAG(
    'cathay_download_open_quiz',
    default_args=default_dag_args,
    # schedule_interval=timedelta(hours=6),
    # schedule_interval='@daily',
    schedule_interval='@once',
    is_paused_upon_creation=False)

Q1_python = python_operator.PythonOperator(
    task_id='Question_1',
    python_callable=Question_1,
    dag=dag,
)

Q2_Q3_python = python_operator.PythonOperator(
    task_id='Question_2__Question_3',
    python_callable=Question_2__Question_3,
    dag=dag,
)

Q4_python = python_operator.PythonOperator(
    task_id='Question_4',
    python_callable=Question_4,
    dag=dag,
)
"""
Simple DAG for using Airflow
"""
import datetime
import logging
from airflow import models
from airflow.operators import bash_operator
from airflow.operators import python_operator

DEFAULT_DAG_ARGS = {'start_date': datetime.datetime(2018, 1, 1)}

with models.DAG('composer_sample_greeting',
                schedule_interval=datetime.timedelta(days=1),
                default_args=DEFAULT_DAG_ARGS) as dag:

    def _hello_python():
        """
        A method here
        """
        logging.info('Hello World!')

    HELLO_PYTHON = python_operator.PythonOperator(
        task_id='HELLO_PYTHON', python_callable=_hello_python)

    GOODBYE_BASH = bash_operator.BashOperator(task_id='GOODBYE_BASH',
                                              bash_command='echo Goodbye')

    HELLO_PYTHON >> GOODBYE_BASH
Esempio n. 24
0
import datetime
from scipy import stats

from airflow import models
from airflow.operators import python_operator

yesterday = datetime.datetime.combine(
    datetime.datetime.today() - datetime.timedelta(1),
    datetime.datetime.min.time())

default_dag_args = {
    'start_date': yesterday,
    'retries': 1,
    'retry_delay': datetime.timedelta(minutes=2)
}

with models.DAG('finding_the_most_common_element',
                schedule_interval=datetime.timedelta(days=1),
                default_args=default_dag_args) as dag:

    def print_most_common_number():
        num = stats.mode(["9", "5", "2", "5", "1", "6"])
        print(num)
        return ('Successfully printed most common element!')

    printing_most_common_element = python_operator.PythonOperator(
        task_id='most_common_number', python_callable=print_most_common_number)

    printing_most_common_element
Esempio n. 25
0
                'corrected_dataset_id', 'corrected_table_name',
                'commitments_table_name', 'enable_cud_cost_attribution']
    ENV_VARS = get_env_variables(KEY_LIST)
    # Create temp tables for each of the three queries
    ENV_VARS['distribute_commitments_table'] = 'temp_distribute_commitments_table'
    ENV_VARS['project_label_credit_breakout_table'] = 'temp_project_label_credit_data_table'
    ENV_VARS['temp_commitments_table_name'] = 'temp_commitments_table'
    # Convert string to bool because environment variables are strings.
    ENV_VARS['enable_cud_cost_attribution'] = (
        ENV_VARS['enable_cud_cost_attribution'].lower() == 'true'
    )
    bq_client = bigquery.Client()

    FORMAT_COMMITMENT_TABLE = python_operator.PythonOperator(
        task_id='format_commitment_table',
        python_callable=format_commitment_table,
        op_kwargs={'env_vars': ENV_VARS}
    )

    PROJECT_LABEL_CREDIT_QUERY = python_operator.PythonOperator(
        task_id='project_label_credit_query',
        python_callable=project_label_credit,
        op_kwargs={'bq_client': bq_client, 'env_vars': ENV_VARS}
    )

    DISTRIBUTE_COMMITMENTS_QUERY = python_operator.PythonOperator(
        task_id='distribute_commitments',
        python_callable=distribute_commitments,
        op_kwargs={'bq_client': bq_client, 'env_vars': ENV_VARS})

    BILLING_OUTPUT_QUERY = python_operator.PythonOperator(
Esempio n. 26
0
    'depends_on_past': False,
    'retries': 0,
    'provide_context': True,
    'retry_delay': datetime.timedelta(minutes=5),
    'start_date': airflow.utils.dates.days_ago(1),
}

slack_dag = DAG(
    'slack_dag',
    'catchup=False',
    default_args=default_args,
    on_failure_callback=task_fail_slack_alert,
    schedule_interval="@once",
)


def div_method(**kwargs):
    print(kwargs)
    nv = 0 / 0
    print(nv)


div_by_zero = python_operator.PythonOperator(
    task_id='div_by_zero',
    python_callable=div_method,
    provide_context=True,
    dag=slack_dag,
)

div_by_zero
Esempio n. 27
0
                        if DELETE_TABLES:
                            client.delete_table(source_table_ref)
                            logging.info(
                                '***** DELETE: -> Table {}:{} deleted.'.format(
                                    BQ_DATASET_NAME, each_temp))
                    else:
                        logging.info(
                            "Table [" + each_temp +
                            "] is still in streaming mode and cannot be processed!"
                            + " *** Seconds elapsed: " +
                            str(detla_in_seconds) + "/" + str(BUFFER_SECONDS) +
                            " seconds!")
                except Exception as err:
                    logging.info("--> Error Detail: " + err.message)
                    break
        logging.info("Total number of rows processed: " + str(row_counter))

    def sample_function():
        import logging
        logging.info('Hello from the sample function!')

    start_dag = python_operator.PythonOperator(task_id='Pre_Tasks',
                                               python_callable=sample_function)
    consolidation = python_operator.PythonOperator(
        task_id='BQ_Table_Consolidation',
        python_callable=run_table_consolidation)
    end_dag = python_operator.PythonOperator(task_id='Cleanup_Tasks',
                                             python_callable=sample_function)

    start_dag >> consolidation >> end_dag
    # 'priority_weight': 10,
}

# Define a DAG (directed acyclic graph) of tasks.
# Any task you create within the context manager is automatically added to the
# DAG object.
with models.DAG(
        'bindexis_end2end',
        schedule_interval=datetime.timedelta(days=1), # or in cron Format
        default_args=default_dag_args) as dag:

    # An instance of an operator is called a task. In this case, the
    # hello_python task calls the "greeting" Python function.
    bindexis_python = python_operator.PythonOperator(
        task_id='bindexis-dataload-start',
        python_callable=def_bindexis_dataload.bindexis_dataload,
        op_kwargs={'user_bindexis': Variable.get("user_bindexis"),
                    'pw_bindexis': Variable.get("password_bindexis")},
        retries=2)

    # Likewise, the goodbye_bash task calls a Bash script.
    end_bash = bash_operator.BashOperator(
        task_id='bindexis-end',
        bash_command='echo bindexis-dataload-end.')

    # Define the order in which the tasks complete by using the >> and <<
    # operators. In this example, bindexis_python executes before end_bash.
    bindexis_python >> end_bash


# Send email confirmation
#email_summary = EmailOperator(
Esempio n. 29
0
        trigger_dag(dag_id=dag_to_trigger,
                    run_id='{}_{}'.format(file_name, uuid4()),
                    conf=json.dumps({'file': file_name}),
                    execution_date=None,
                    replace_microseconds=False)

        files_triggered.append(file_name)

    logger.info('triggered %s for %s files: %s' % (dag_to_trigger, len(files_triggered), files_triggered))


dag = DAG('trigger_process_zip_dag',
          default_args=default_args,
          schedule_interval=SCHEDULE_INTERVAL)

task_1 = python_operator.PythonOperator(
    task_id='get_zip_files_to_process',
    python_callable=get_zip_files_to_process,
    dag=dag
)

task_2 = python_operator.PythonOperator(
    task_id='run_dag_for_each_file',
    provide_context=True,
    python_callable=run_dag_for_each_file,
    op_args=[process_zip_dag.dag.dag_id],
    dag=dag
)

task_1.set_downstream(task_2)
Esempio n. 30
0
    APITags = DataFlowPythonOperator(py_file=pipeline_api_tags,
                                     options={
                                         'input': tags_path,
                                         'temp_location': temp_bucket,
                                         'project': project
                                     },
                                     task_id='apicallpipeline')

    # Comments and Answers reports

    sql = 'SELECT question_id FROM `{0}.{1}` WHERE creation_date >= TIMESTAMP("{2}")'.format(
        dataset, table_question, yesterday_dash_string)

    Query = python_operator.PythonOperator(task_id='Query',
                                           python_callable=QueryToGCS,
                                           op_kwargs={'sql': sql})

    CommentsExport = python_operator.PythonOperator(
        task_id='CommentsExport', python_callable=CommentsToGCS)
    AnswerExport = python_operator.PythonOperator(task_id='AnswerExport',
                                                  python_callable=AnswersToGCS)

    comment_file = '{}_{}.json'.format(comment_export, yesterday_string)
    answer_file = '{}_{}.json'.format(answer_export, yesterday_string)

    CommentToGCS = GoogleCloudStorageToGoogleCloudStorageOperator(
        task_id="Comment_to_GSC",
        source_bucket=source_bucket,
        source_object="data/{}".format(comment_file),
        destination_bucket=destination_bucket,