Esempio n. 1
0
    def test_poke_exception(self):
        """
        Exception occurs in poke function should not be ignored.
        """
        def resp_check(resp):
            raise AirflowException('AirflowException raised here!')

        task = HttpSensor(task_id='http_sensor_poke_exception',
                          http_conn_id='http_default',
                          endpoint='',
                          request_params={},
                          response_check=resp_check,
                          poke_interval=5)
        with self.assertRaisesRegexp(AirflowException,
                                     'AirflowException raised here!'):
            task.execute(None)
    task_id='put_op',
    method='PUT',
    endpoint='put',
    data=json.dumps({"priority": 5}),
    headers={"Content-Type": "application/json"},
    dag=dag,
)
# [END howto_operator_http_task_put_op]
# [START howto_operator_http_task_del_op]
task_del_op = SimpleHttpOperator(
    task_id='del_op',
    method='DELETE',
    endpoint='delete',
    data="some=data",
    headers={"Content-Type": "application/x-www-form-urlencoded"},
    dag=dag,
)
# [END howto_operator_http_task_del_op]
# [START howto_operator_http_http_sensor_check]
task_http_sensor_check = HttpSensor(
    task_id='http_sensor_check',
    http_conn_id='http_default',
    endpoint='',
    request_params={},
    response_check=lambda response: "httpbin" in response.text,
    poke_interval=5,
    dag=dag,
)
# [END howto_operator_http_http_sensor_check]
task_http_sensor_check >> task_post_op >> task_get_op >> task_get_op_response_filter
task_get_op_response_filter >> task_put_op >> task_del_op >> task_post_op_formenc
        'geonames_endpoint': 'export/dump/allCountries.zip',
    },
    schedule_interval='30 1 * * 0',
    tags=['k8s', 'nemo', 'psc', 'egg'],
)
# [END instantiate_dag]

with pipeline:

    # [START task_http_geonames_org_sensor_check]
    task_http_geonames_org_sensor_check = HttpSensor(
        task_id='http_geonames_org_sensor_check',
        http_conn_id='http_geonames_org',
        endpoint='{{ params.geonames_endpoint }}',
        method='HEAD',
        response_check=lambda response: True if response.ok else False,
        poke_interval=2,
        # Extra options for the ‘requests’ library, see the ‘requests’ documentation (options to modify timeout, ssl, etc.)
        extra_options={
            'verify': False,
        },
    )

    # [START task_http_egg_svc_check]
    task_http_egg_svc_check = KubernetesPodOperator(
        namespace='processing',
        name='dea-access-egg-svc-check',
        task_id='http_egg_svc_sensor_check',
        image_pull_policy='IfNotPresent',
        image=CURL_SVC_IMAGE,
        is_delete_operator_pod=True,
        arguments=["--verbose", "http://{{ params.egg_svc_name }}:9200"],
Esempio n. 4
0
t3 = SimpleHttpOperator(task_id='put_op',
                        method='PUT',
                        endpoint='api/v1.0/nodes',
                        data=json.dumps({"priority": 5}),
                        headers={"Content-Type": "application/json"},
                        dag=dag)

t4 = SimpleHttpOperator(
    task_id='del_op',
    method='DELETE',
    endpoint='api/v1.0/nodes',
    data="some=data",
    headers={"Content-Type": "application/x-www-form-urlencoded"},
    dag=dag)

sensor = HttpSensor(task_id='http_sensor_check',
                    http_conn_id='http_default',
                    endpoint='',
                    params={},
                    response_check=lambda response: True
                    if "Google" in response.content else False,
                    poke_interval=5,
                    dag=dag)

t1.set_upstream(sensor)
t2.set_upstream(t1)
t3.set_upstream(t2)
t4.set_upstream(t3)
t5.set_upstream(t4)
Esempio n. 5
0
    'email_on_retry': False,
    'retries': 5,
    'retry_delay': timedelta(minutes=5)
    # 'concurrency': 4
}

dag = DAG('extract_github_commits',
          default_args=default_args,
          schedule_interval='@daily')
hdfs_dir = 'hdfs://10.0.0.9:9000'

# Check if the new dump exists yet, retry every hour until it does
check_for_new_dump = HttpSensor(task_id='check_for_new_dump',
                                http_conn_id='ghtorrent',
                                method='HEAD',
                                poke_interval=60 * 60,
                                timeout=60 * 60 * 24,
                                endpoint="""mongo-dump-{{ ds }}.tar.gz""",
                                dag=dag)

# Download the bson file
download = BashOperator(task_id='download',
                        bash_command="""
wget -qO- http://ghtorrent-downloads.ewi.tudelft.nl/mongo-daily/mongo-dump-{{ ds }}.tar.gz | tar xvz dump/github/commits.bson --strip-components=2
wait
mv commits.bson ~/staging/commits_{{ ds }}.bson
    """,
                        params={'hdfs_dir': hdfs_dir},
                        dag=dag)

# this extracts the bson file
#     http_conn_id='rest-connection',
#     endpoint="/update?id={empId}".format(empId = Variable.get("id")),
#     method="PUT",
#     headers={"Content-Type": "application/json"},
#     response_filter=lambda response: response.json(),
#     xcom_push=True,
#     dag=dag,
# )

# [END howto_operator_http_task_del_op]
# [START howto_operator_http_http_sensor_check]
task_http_sensor_check = HttpSensor(
    task_id='api_health_check',
    http_conn_id=conn_id,
    endpoint='/',
    request_params={},
    # response_check=lambda response: "httpbin" in response.text,
    poke_interval=5,
    # on_failure_callback=notify_email,
    dag=dag,
)

# Task 3: Save JSON data locally
# save_and_transform = PythonOperator(
#     task_id="save_and_transform",
#     python_callable=transform_json,
#     provide_context=True,
# )

save_employee = PythonOperator(task_id="save_employee_transform",
                               python_callable=save_emp_json,
                               provide_context=True)
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
}

dag_x = DAG("segmentstream_demo",
            default_args=default_args,
            schedule_interval=timedelta(days=1))

initial_task = DummyOperator(task_id='start', dag=dag_x)

wait_for_currency_rates_service = HttpSensor(
    task_id="wait_for_currency_rates_service",
    dag=dag_x,
    http_conn_id='currency_service',
    method='GET',
    endpoint='get_rates',
    headers={"Content-Type": "application/json"},
    request_params={'date': datetime.now().strftime('%d.%m.%Y')},
    response_check=check_currency_response,
)
wait_for_currency_rates_service << initial_task

get_daily_conversion_rates = PythonOperator(
    task_id="get_daily_conversion_rates",
    python_callable=get_daily_conversion_rates_callback,
    provide_context=True,
    dag=dag_x)

get_daily_conversion_rates.set_upstream(
    task_or_task_list=wait_for_currency_rates_service)
Esempio n. 8
0
from datetime import datetime

default_args = {
    'owner': 'airflow',
    'depends_on_past': True,
    'start_date': datetime(2019, 1, 1),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False
}

dag = DAG('workshop_airflow_exo_2',
          default_args=default_args,
          schedule_interval="0 6 * * *")

wait_for_right_time = HttpSensor(
    task_id='wait_for_right_time',
    http_conn_id='navitia',
    endpoint='journeys?from=2.2728894%3B48.8812988&to=2.2950275%3B48.8737917&',
    headers={'Authorization': '9cdfa8dd-4ed8-4411-a6eb-690d361fddf6'},
    response_check=check_if_time_to_leave,
    dag=dag)

send_mail = EmailOperator(task_id='send_mail',
                          to=['*****@*****.**'],
                          subject="You need to leave now!",
                          html_content="Leave now if you want to be on time!",
                          dag=dag)

wait_for_right_time >> send_mail
Esempio n. 9
0
    xcom_push=True,
    dag=dag)

# retrieve the job id associated with the async call in t1
t2 = PythonOperator(
    task_id='weekly_dbm_advertiser_sync_jobid',
    python_callable=setSyncEndPoint,
    provide_context=True,
    dag=dag
)

t3 = HttpSensor(
    task_id='weekly_dbm_advertiser_sync_status',
    http_conn_id='i2ap_processor',
    endpoint=Variable.get('weekly_dbm_advertiser_sync-statusEndpoint'),
    headers={"Content-Type": "application/json",
             "Tt-I2ap-Id": "*****@*****.**",
             "Tt-I2ap-Sec": "E8OLhEWWihzdpIz5"},
    response_check=responseCheck,
    poke_interval=60,
    dag=dag)

# Make the asynchronous call to the i2ap data job
t4 = SimpleHttpOperator(
    task_id='weekly_dbm_partner_pull',
    endpoint='/Partner',
    method='POST',
    data=json.dumps({"start-date": startDate,
                     "end-date": endDate,
                     "restrict": "True",
                     "history": "False",
                     "version": Variable.get('weekly_dbm_partner_pull-version')}),
    task_id='cms_data_pull',
    python_callable=cdp.run_cms_data_pull,
    op_kwargs={"website_link": "data.cms.gov",
               "token": None,
               "dataset_identifier": "xbte-dn4t",
               "crawl_limit": 5000,
               "db_url_full": constants.LOCAL_DB_URL,
               "db_url": "airflow_works",
               "schema": "sandbox",
               "table_name": "cms_drug_file"},
    dag=dag_game_1)

s1 = HttpSensor(
    task_id='http_sensor_check',
    http_conn_id='http_default',
    endpoint='',
    request_params={},
    response_check=lambda response: True if "Google" in response.text else False,
    dag=dag_game_1,
)

s2 = HttpSensor(
    task_id='cms_http_sensor',
    http_conn_id='cms_gov_http_id',
    endpoint='',
    request_params={},
    dag=dag_game_1,
)


t2.set_upstream(t1)
t1.set_upstream(s1)
Esempio n. 11
0
    data=json.dumps(job_param_dict),
    xcom_push=True,
    response_check=lambda response: response.json().get('run_id') is not None,
    dag=dag)

run_id_extractor = PythonOperator(task_id='extract_run_id',
                                  provide_context=True,
                                  python_callable=extract_run_id,
                                  dag=dag)

state_http_sensor = HttpSensor(
    task_id='sensor_job_state',
    http_conn_id='databricks',
    timeout=timeout,
    method='GET',
    endpoint='/api/2.0/jobs/runs/get',
    request_params={
        'run_id': """{{ ti.xcom_pull(task_ids='extract_run_id') }}"""
    },
    response_check=check_state,
    poke_interval=30,
    dag=dag)

fetch_result_http_op = SimpleHttpOperator(
    task_id='http_get_to_databricks',
    http_conn_id='databricks',
    method='GET',
    data={'run_id': """{{ ti.xcom_pull(task_ids='extract_run_id') }}"""},
    endpoint='/api/2.0/jobs/runs/get-output',
    xcom_push=True,
    response_check=lambda response: response.json()['metadata']['state'].get(
        'result_state') == 'SUCCESS',