Ejemplo n.º 1
0
    def test_logging_head_error_request(
        self,
        mock_session_send
    ):

        def resp_check(resp):
            return True

        import requests
        response = requests.Response()
        response.status_code = 404
        response.reason = 'Not Found'
        mock_session_send.return_value = response

        task = HttpSensor(
            dag=self.dag,
            task_id='http_sensor_head_method',
            http_conn_id='http_default',
            endpoint='',
            request_params={},
            method='HEAD',
            response_check=resp_check,
            timeout=5,
            poke_interval=1
        )

        with mock.patch.object(task.hook.log, 'error') as mock_errors:
            with self.assertRaises(AirflowSensorTimeout):
                task.execute(None)

            self.assertTrue(mock_errors.called)
            mock_errors.assert_called_with('HTTP error: %s', 'Not Found')
Ejemplo n.º 2
0
    def test_logging_head_error_request(self, mock_session_send):
        def resp_check(resp):
            return True

        import requests
        response = requests.Response()
        response.status_code = 404
        response.reason = 'Not Found'
        mock_session_send.return_value = response

        task = HttpSensor(dag=self.dag,
                          task_id='http_sensor_head_method',
                          http_conn_id='http_default',
                          endpoint='',
                          request_params={},
                          method='HEAD',
                          response_check=resp_check,
                          timeout=5,
                          poke_interval=1)

        with mock.patch.object(task.hook.log, 'error') as mock_errors:
            with self.assertRaises(AirflowSensorTimeout):
                task.execute(None)

            self.assertTrue(mock_errors.called)
            mock_errors.assert_called_with('HTTP error: %s', 'Not Found')
Ejemplo n.º 3
0
    def test_head_method(self, mock_session_send):
        def resp_check(resp):
            return True

        task = HttpSensor(
            dag=self.dag,
            task_id='http_sensor_head_method',
            http_conn_id='http_default',
            endpoint='',
            request_params={},
            method='HEAD',
            response_check=resp_check,
            timeout=5,
            poke_interval=1)

        import requests
        task.execute(None)

        args, kwargs = mock_session_send.call_args
        received_request = args[0]

        prep_request = requests.Request(
            'HEAD',
            'https://www.google.com',
            {}).prepare()

        self.assertEqual(prep_request.url, received_request.url)
        self.assertTrue(prep_request.method, received_request.method)
Ejemplo n.º 4
0
    def test_head_method(self, mock_session_send):
        def resp_check(resp):
            return True

        task = HttpSensor(dag=self.dag,
                          task_id='http_sensor_head_method',
                          http_conn_id='http_default',
                          endpoint='',
                          request_params={},
                          method='HEAD',
                          response_check=resp_check,
                          timeout=5,
                          poke_interval=1)

        import requests
        task.execute(None)

        args, kwargs = mock_session_send.call_args
        received_request = args[0]

        prep_request = requests.Request('HEAD', 'https://www.google.com',
                                        {}).prepare()

        self.assertEqual(prep_request.url, received_request.url)
        self.assertTrue(prep_request.method, received_request.method)
def run_flow_and_wait_for_completion():
    run_flow_task = SimpleHttpOperator(
        task_id='run_flow',
        endpoint='/v4/jobGroups',
        data=json.dumps({
            "wrangledDataset": {
                "id": int(recipe_id)
            },
            "runParameters": {
                "overrides": {
                    "data": [{
                        "key": "region",
                        "value": str(region)
                    }]
                }
            }
        }),
        headers=headers,
        xcom_push=True,
        dag=dag,
    )

    wait_for_flow_run_to_complete = HttpSensor(
        task_id='wait_for_flow_run_to_complete',
        endpoint=
        '/v4/jobGroups/{{ json.loads(ti.xcom_pull(task_ids="run_flow"))["id"] }}?embed=jobs.errorMessage',
        headers=headers,
        response_check=check_flow_run_complete,
        poke_interval=10,
        dag=dag,
    )

    run_flow_task.set_downstream(wait_for_flow_run_to_complete)

    return wait_for_flow_run_to_complete
Ejemplo n.º 6
0
    def test_poke_exception(self):
        """
        Exception occurs in poke function should not be ignored.
        """
        def resp_check(resp):
            raise AirflowException('AirflowException raised here!')

        task = HttpSensor(
            task_id='http_sensor_poke_exception',
            http_conn_id='http_default',
            endpoint='',
            params={},
            response_check=resp_check,
            poke_interval=5)
        with self.assertRaisesRegexp(AirflowException, 'AirflowException raised here!'):
            task.execute(None)
Ejemplo n.º 7
0
    def test_poke_exception(self):
        """
        Exception occurs in poke function should not be ignored.
        """
        def resp_check(resp):
            raise AirflowException('AirflowException raised here!')

        task = HttpSensor(task_id='http_sensor_poke_exception',
                          http_conn_id='http_default',
                          endpoint='',
                          params={},
                          response_check=resp_check,
                          poke_interval=5)
        with self.assertRaisesRegexp(AirflowException,
                                     'AirflowException raised here!'):
            task.execute(None)
Ejemplo n.º 8
0
def create_subdag(default_args, subdag_id, job_param_dict, timeout):
    subdag = DAG(dag_id=subdag_id,
                 default_args=default_args,
                 schedule_interval=None,
                 catchup=False)

    trigger_job_http_op = SimpleHttpOperator(
        task_id='http_post_to_databricks',
        http_conn_id='databricks',
        endpoint='/api/2.0/jobs/run-now',
        method='POST',
        headers={'Content-Type': 'application/json'},
        data=json.dumps(job_param_dict),
        xcom_push=True,
        response_check=lambda response: response.json().get('run_id'
                                                            ) is not None,
        dag=subdag)

    run_id_extractor = PythonOperator(task_id='extract_run_id',
                                      provide_context=True,
                                      python_callable=extract_run_id,
                                      dag=subdag)

    state_http_sensor = HttpSensor(
        task_id='sensor_job_state',
        http_conn_id='databricks',
        timeout=timeout,
        method='GET',
        endpoint='/api/2.0/jobs/runs/get',
        request_params={
            'run_id': """{{ ti.xcom_pull(task_ids='extract_run_id') }}"""
        },
        response_check=check_state,
        poke_interval=30,
        dag=subdag)

    fetch_result_http_op = SimpleHttpOperator(
        task_id='http_get_to_databricks',
        http_conn_id='databricks',
        method='GET',
        data={'run_id': """{{ ti.xcom_pull(task_ids='extract_run_id') }}"""},
        endpoint='/api/2.0/jobs/runs/get-output',
        xcom_push=True,
        response_check=lambda response: response.json()['metadata'][
            'state'].get('result_state') == 'SUCCESS',
        dag=subdag)

    result_extractor = PythonOperator(task_id='extract_result',
                                      provide_context=True,
                                      python_callable=extract_result,
                                      dag=subdag)

    trigger_job_http_op >> run_id_extractor >> state_http_sensor >> fetch_result_http_op >> result_extractor

    return subdag
    task_id='put_op',
    method='PUT',
    endpoint='put',
    data=json.dumps({"priority": 5}),
    headers={"Content-Type": "application/json"},
    dag=dag,
)
# [END howto_operator_http_task_put_op]
# [START howto_operator_http_task_del_op]
task_del_op = SimpleHttpOperator(
    task_id='del_op',
    method='DELETE',
    endpoint='delete',
    data="some=data",
    headers={"Content-Type": "application/x-www-form-urlencoded"},
    dag=dag,
)
# [END howto_operator_http_task_del_op]
# [START howto_operator_http_http_sensor_check]
task_http_sensor_check = HttpSensor(
    task_id='http_sensor_check',
    http_conn_id='http_default',
    endpoint='',
    request_params={},
    response_check=lambda response: "httpbin" in response.text,
    poke_interval=5,
    dag=dag,
)
# [END howto_operator_http_http_sensor_check]
task_http_sensor_check >> task_post_op >> task_get_op >> task_get_op_response_filter
task_get_op_response_filter >> task_put_op >> task_del_op >> task_post_op_formenc
        'geonames_endpoint': 'export/dump/allCountries.zip',
    },
    schedule_interval='30 1 * * 0',
    tags=['k8s', 'nemo', 'psc', 'egg'],
)
# [END instantiate_dag]

with pipeline:

    # [START task_http_geonames_org_sensor_check]
    task_http_geonames_org_sensor_check = HttpSensor(
        task_id='http_geonames_org_sensor_check',
        http_conn_id='http_geonames_org',
        endpoint='{{ params.geonames_endpoint }}',
        method='HEAD',
        response_check=lambda response: True if response.ok else False,
        poke_interval=2,
        # Extra options for the ‘requests’ library, see the ‘requests’ documentation (options to modify timeout, ssl, etc.)
        extra_options={
            'verify': False,
        },
    )

    # [START task_http_egg_svc_check]
    task_http_egg_svc_check = KubernetesPodOperator(
        namespace='processing',
        name='dea-access-egg-svc-check',
        task_id='http_egg_svc_sensor_check',
        image_pull_policy='IfNotPresent',
        image=CURL_SVC_IMAGE,
        is_delete_operator_pod=True,
        arguments=["--verbose", "http://{{ params.egg_svc_name }}:9200"],
Ejemplo n.º 11
0
    'email_on_retry': False,
    'retries': 5,
    'retry_delay': timedelta(minutes=5)
    # 'concurrency': 4
}

dag = DAG('extract_github_commits',
          default_args=default_args,
          schedule_interval='@daily')
hdfs_dir = 'hdfs://10.0.0.9:9000'

# Check if the new dump exists yet, retry every hour until it does
check_for_new_dump = HttpSensor(task_id='check_for_new_dump',
                                http_conn_id='ghtorrent',
                                method='HEAD',
                                poke_interval=60 * 60,
                                timeout=60 * 60 * 24,
                                endpoint="""mongo-dump-{{ ds }}.tar.gz""",
                                dag=dag)

# Download the bson file
download = BashOperator(task_id='download',
                        bash_command="""
wget -qO- http://ghtorrent-downloads.ewi.tudelft.nl/mongo-daily/mongo-dump-{{ ds }}.tar.gz | tar xvz dump/github/commits.bson --strip-components=2
wait
mv commits.bson ~/staging/commits_{{ ds }}.bson
    """,
                        params={'hdfs_dir': hdfs_dir},
                        dag=dag)

# this extracts the bson file
Ejemplo n.º 12
0
t3 = SimpleHttpOperator(task_id='put_op',
                        method='PUT',
                        endpoint='api/v1.0/nodes',
                        data=json.dumps({"priority": 5}),
                        headers={"Content-Type": "application/json"},
                        dag=dag)

t4 = SimpleHttpOperator(
    task_id='del_op',
    method='DELETE',
    endpoint='api/v1.0/nodes',
    data="some=data",
    headers={"Content-Type": "application/x-www-form-urlencoded"},
    dag=dag)

sensor = HttpSensor(task_id='http_sensor_check',
                    http_conn_id='http_default',
                    endpoint='',
                    params={},
                    response_check=lambda response: True
                    if "Google" in response.content else False,
                    poke_interval=5,
                    dag=dag)

t1.set_upstream(sensor)
t2.set_upstream(t1)
t3.set_upstream(t2)
t4.set_upstream(t3)
t5.set_upstream(t4)
#     http_conn_id='rest-connection',
#     endpoint="/update?id={empId}".format(empId = Variable.get("id")),
#     method="PUT",
#     headers={"Content-Type": "application/json"},
#     response_filter=lambda response: response.json(),
#     xcom_push=True,
#     dag=dag,
# )

# [END howto_operator_http_task_del_op]
# [START howto_operator_http_http_sensor_check]
task_http_sensor_check = HttpSensor(
    task_id='api_health_check',
    http_conn_id=conn_id,
    endpoint='/',
    request_params={},
    # response_check=lambda response: "httpbin" in response.text,
    poke_interval=5,
    # on_failure_callback=notify_email,
    dag=dag,
)

# Task 3: Save JSON data locally
# save_and_transform = PythonOperator(
#     task_id="save_and_transform",
#     python_callable=transform_json,
#     provide_context=True,
# )

save_employee = PythonOperator(task_id="save_employee_transform",
                               python_callable=save_emp_json,
                               provide_context=True)
Ejemplo n.º 14
0
    task_id='cms_data_pull',
    python_callable=cdp.run_cms_data_pull,
    op_kwargs={"website_link": "data.cms.gov",
               "token": None,
               "dataset_identifier": "xbte-dn4t",
               "crawl_limit": 5000,
               "db_url_full": constants.LOCAL_DB_URL,
               "db_url": "airflow_works",
               "schema": "sandbox",
               "table_name": "cms_drug_file"},
    dag=dag_game_1)

s1 = HttpSensor(
    task_id='http_sensor_check',
    http_conn_id='http_default',
    endpoint='',
    request_params={},
    response_check=lambda response: True if "Google" in response.text else False,
    dag=dag_game_1,
)

s2 = HttpSensor(
    task_id='cms_http_sensor',
    http_conn_id='cms_gov_http_id',
    endpoint='',
    request_params={},
    dag=dag_game_1,
)


t2.set_upstream(t1)
t1.set_upstream(s1)
Ejemplo n.º 15
0
    xcom_push=True,
    dag=dag)

# retrieve the job id associated with the async call in t1
t2 = PythonOperator(
    task_id='weekly_dbm_advertiser_sync_jobid',
    python_callable=setSyncEndPoint,
    provide_context=True,
    dag=dag
)

t3 = HttpSensor(
    task_id='weekly_dbm_advertiser_sync_status',
    http_conn_id='i2ap_processor',
    endpoint=Variable.get('weekly_dbm_advertiser_sync-statusEndpoint'),
    headers={"Content-Type": "application/json",
             "Tt-I2ap-Id": "*****@*****.**",
             "Tt-I2ap-Sec": "E8OLhEWWihzdpIz5"},
    response_check=responseCheck,
    poke_interval=60,
    dag=dag)

# Make the asynchronous call to the i2ap data job
t4 = SimpleHttpOperator(
    task_id='weekly_dbm_partner_pull',
    endpoint='/Partner',
    method='POST',
    data=json.dumps({"start-date": startDate,
                     "end-date": endDate,
                     "restrict": "True",
                     "history": "False",
                     "version": Variable.get('weekly_dbm_partner_pull-version')}),
Ejemplo n.º 16
0
                             python_callable=print_context,
                             dag=dag)

qubole_task = QuboleOperator(
    task_id='qubole_task',
    command_type='shellcmd',
    script='ls /usr/lib/airflow',
    cluster_label='airflow-demo',
    fetch_logs=
    True,  # If true, will fetch qubole command logs and concatenate them into corresponding airflow task logs # To attach tags to qubole command, auto attach 3 tags - dag_id, task_id, run_id
    qubole_conn_id=
    'qubole_default',  # Connection id to submit commands inside QDS, if not set "qubole_default" is used
    dag=dag)

bash_task = BashOperator(
    task_id='bash_task',
    bash_command='echo "run_id={{ run_id }} | dag_run={{ dag_run }}"',
    dag=dag)

http_sensor_task = HttpSensor(task_id='http_sensor_task',
                              http_conn_id='http_default',
                              endpoint='',
                              request_params={},
                              response_check=lambda response: True
                              if "Google" in str(response.content) else False,
                              poke_interval=5,
                              dag=dag)

qubole_task.set_upstream(python_task)
bash_task.set_upstream(python_task)
http_sensor_task.set_upstream(python_task)
Ejemplo n.º 17
0
from datetime import datetime

default_args = {
    'owner': 'airflow',
    'depends_on_past': True,
    'start_date': datetime(2019, 1, 1),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False
}

dag = DAG('workshop_airflow_exo_2',
          default_args=default_args,
          schedule_interval="0 6 * * *")

wait_for_right_time = HttpSensor(
    task_id='wait_for_right_time',
    http_conn_id='navitia',
    endpoint='journeys?from=2.2728894%3B48.8812988&to=2.2950275%3B48.8737917&',
    headers={'Authorization': '9cdfa8dd-4ed8-4411-a6eb-690d361fddf6'},
    response_check=check_if_time_to_leave,
    dag=dag)

send_mail = EmailOperator(task_id='send_mail',
                          to=['*****@*****.**'],
                          subject="You need to leave now!",
                          html_content="Leave now if you want to be on time!",
                          dag=dag)

wait_for_right_time >> send_mail
Ejemplo n.º 18
0
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
}

dag_x = DAG("segmentstream_demo",
            default_args=default_args,
            schedule_interval=timedelta(days=1))

initial_task = DummyOperator(task_id='start', dag=dag_x)

wait_for_currency_rates_service = HttpSensor(
    task_id="wait_for_currency_rates_service",
    dag=dag_x,
    http_conn_id='currency_service',
    method='GET',
    endpoint='get_rates',
    headers={"Content-Type": "application/json"},
    request_params={'date': datetime.now().strftime('%d.%m.%Y')},
    response_check=check_currency_response,
)
wait_for_currency_rates_service << initial_task

get_daily_conversion_rates = PythonOperator(
    task_id="get_daily_conversion_rates",
    python_callable=get_daily_conversion_rates_callback,
    provide_context=True,
    dag=dag_x)

get_daily_conversion_rates.set_upstream(
    task_or_task_list=wait_for_currency_rates_service)
Ejemplo n.º 19
0
    data=json.dumps(job_param_dict),
    xcom_push=True,
    response_check=lambda response: response.json().get('run_id') is not None,
    dag=dag)

run_id_extractor = PythonOperator(task_id='extract_run_id',
                                  provide_context=True,
                                  python_callable=extract_run_id,
                                  dag=dag)

state_http_sensor = HttpSensor(
    task_id='sensor_job_state',
    http_conn_id='databricks',
    timeout=timeout,
    method='GET',
    endpoint='/api/2.0/jobs/runs/get',
    request_params={
        'run_id': """{{ ti.xcom_pull(task_ids='extract_run_id') }}"""
    },
    response_check=check_state,
    poke_interval=30,
    dag=dag)

fetch_result_http_op = SimpleHttpOperator(
    task_id='http_get_to_databricks',
    http_conn_id='databricks',
    method='GET',
    data={'run_id': """{{ ti.xcom_pull(task_ids='extract_run_id') }}"""},
    endpoint='/api/2.0/jobs/runs/get-output',
    xcom_push=True,
    response_check=lambda response: response.json()['metadata']['state'].get(
        'result_state') == 'SUCCESS',