def test_logging_head_error_request( self, mock_session_send ): def resp_check(resp): return True import requests response = requests.Response() response.status_code = 404 response.reason = 'Not Found' mock_session_send.return_value = response task = HttpSensor( dag=self.dag, task_id='http_sensor_head_method', http_conn_id='http_default', endpoint='', request_params={}, method='HEAD', response_check=resp_check, timeout=5, poke_interval=1 ) with mock.patch.object(task.hook.log, 'error') as mock_errors: with self.assertRaises(AirflowSensorTimeout): task.execute(None) self.assertTrue(mock_errors.called) mock_errors.assert_called_with('HTTP error: %s', 'Not Found')
def test_logging_head_error_request(self, mock_session_send): def resp_check(resp): return True import requests response = requests.Response() response.status_code = 404 response.reason = 'Not Found' mock_session_send.return_value = response task = HttpSensor(dag=self.dag, task_id='http_sensor_head_method', http_conn_id='http_default', endpoint='', request_params={}, method='HEAD', response_check=resp_check, timeout=5, poke_interval=1) with mock.patch.object(task.hook.log, 'error') as mock_errors: with self.assertRaises(AirflowSensorTimeout): task.execute(None) self.assertTrue(mock_errors.called) mock_errors.assert_called_with('HTTP error: %s', 'Not Found')
def test_head_method(self, mock_session_send): def resp_check(resp): return True task = HttpSensor( dag=self.dag, task_id='http_sensor_head_method', http_conn_id='http_default', endpoint='', request_params={}, method='HEAD', response_check=resp_check, timeout=5, poke_interval=1) import requests task.execute(None) args, kwargs = mock_session_send.call_args received_request = args[0] prep_request = requests.Request( 'HEAD', 'https://www.google.com', {}).prepare() self.assertEqual(prep_request.url, received_request.url) self.assertTrue(prep_request.method, received_request.method)
def test_head_method(self, mock_session_send): def resp_check(resp): return True task = HttpSensor(dag=self.dag, task_id='http_sensor_head_method', http_conn_id='http_default', endpoint='', request_params={}, method='HEAD', response_check=resp_check, timeout=5, poke_interval=1) import requests task.execute(None) args, kwargs = mock_session_send.call_args received_request = args[0] prep_request = requests.Request('HEAD', 'https://www.google.com', {}).prepare() self.assertEqual(prep_request.url, received_request.url) self.assertTrue(prep_request.method, received_request.method)
def run_flow_and_wait_for_completion(): run_flow_task = SimpleHttpOperator( task_id='run_flow', endpoint='/v4/jobGroups', data=json.dumps({ "wrangledDataset": { "id": int(recipe_id) }, "runParameters": { "overrides": { "data": [{ "key": "region", "value": str(region) }] } } }), headers=headers, xcom_push=True, dag=dag, ) wait_for_flow_run_to_complete = HttpSensor( task_id='wait_for_flow_run_to_complete', endpoint= '/v4/jobGroups/{{ json.loads(ti.xcom_pull(task_ids="run_flow"))["id"] }}?embed=jobs.errorMessage', headers=headers, response_check=check_flow_run_complete, poke_interval=10, dag=dag, ) run_flow_task.set_downstream(wait_for_flow_run_to_complete) return wait_for_flow_run_to_complete
def test_poke_exception(self): """ Exception occurs in poke function should not be ignored. """ def resp_check(resp): raise AirflowException('AirflowException raised here!') task = HttpSensor( task_id='http_sensor_poke_exception', http_conn_id='http_default', endpoint='', params={}, response_check=resp_check, poke_interval=5) with self.assertRaisesRegexp(AirflowException, 'AirflowException raised here!'): task.execute(None)
def test_poke_exception(self): """ Exception occurs in poke function should not be ignored. """ def resp_check(resp): raise AirflowException('AirflowException raised here!') task = HttpSensor(task_id='http_sensor_poke_exception', http_conn_id='http_default', endpoint='', params={}, response_check=resp_check, poke_interval=5) with self.assertRaisesRegexp(AirflowException, 'AirflowException raised here!'): task.execute(None)
def create_subdag(default_args, subdag_id, job_param_dict, timeout): subdag = DAG(dag_id=subdag_id, default_args=default_args, schedule_interval=None, catchup=False) trigger_job_http_op = SimpleHttpOperator( task_id='http_post_to_databricks', http_conn_id='databricks', endpoint='/api/2.0/jobs/run-now', method='POST', headers={'Content-Type': 'application/json'}, data=json.dumps(job_param_dict), xcom_push=True, response_check=lambda response: response.json().get('run_id' ) is not None, dag=subdag) run_id_extractor = PythonOperator(task_id='extract_run_id', provide_context=True, python_callable=extract_run_id, dag=subdag) state_http_sensor = HttpSensor( task_id='sensor_job_state', http_conn_id='databricks', timeout=timeout, method='GET', endpoint='/api/2.0/jobs/runs/get', request_params={ 'run_id': """{{ ti.xcom_pull(task_ids='extract_run_id') }}""" }, response_check=check_state, poke_interval=30, dag=subdag) fetch_result_http_op = SimpleHttpOperator( task_id='http_get_to_databricks', http_conn_id='databricks', method='GET', data={'run_id': """{{ ti.xcom_pull(task_ids='extract_run_id') }}"""}, endpoint='/api/2.0/jobs/runs/get-output', xcom_push=True, response_check=lambda response: response.json()['metadata'][ 'state'].get('result_state') == 'SUCCESS', dag=subdag) result_extractor = PythonOperator(task_id='extract_result', provide_context=True, python_callable=extract_result, dag=subdag) trigger_job_http_op >> run_id_extractor >> state_http_sensor >> fetch_result_http_op >> result_extractor return subdag
task_id='put_op', method='PUT', endpoint='put', data=json.dumps({"priority": 5}), headers={"Content-Type": "application/json"}, dag=dag, ) # [END howto_operator_http_task_put_op] # [START howto_operator_http_task_del_op] task_del_op = SimpleHttpOperator( task_id='del_op', method='DELETE', endpoint='delete', data="some=data", headers={"Content-Type": "application/x-www-form-urlencoded"}, dag=dag, ) # [END howto_operator_http_task_del_op] # [START howto_operator_http_http_sensor_check] task_http_sensor_check = HttpSensor( task_id='http_sensor_check', http_conn_id='http_default', endpoint='', request_params={}, response_check=lambda response: "httpbin" in response.text, poke_interval=5, dag=dag, ) # [END howto_operator_http_http_sensor_check] task_http_sensor_check >> task_post_op >> task_get_op >> task_get_op_response_filter task_get_op_response_filter >> task_put_op >> task_del_op >> task_post_op_formenc
'geonames_endpoint': 'export/dump/allCountries.zip', }, schedule_interval='30 1 * * 0', tags=['k8s', 'nemo', 'psc', 'egg'], ) # [END instantiate_dag] with pipeline: # [START task_http_geonames_org_sensor_check] task_http_geonames_org_sensor_check = HttpSensor( task_id='http_geonames_org_sensor_check', http_conn_id='http_geonames_org', endpoint='{{ params.geonames_endpoint }}', method='HEAD', response_check=lambda response: True if response.ok else False, poke_interval=2, # Extra options for the ‘requests’ library, see the ‘requests’ documentation (options to modify timeout, ssl, etc.) extra_options={ 'verify': False, }, ) # [START task_http_egg_svc_check] task_http_egg_svc_check = KubernetesPodOperator( namespace='processing', name='dea-access-egg-svc-check', task_id='http_egg_svc_sensor_check', image_pull_policy='IfNotPresent', image=CURL_SVC_IMAGE, is_delete_operator_pod=True, arguments=["--verbose", "http://{{ params.egg_svc_name }}:9200"],
'email_on_retry': False, 'retries': 5, 'retry_delay': timedelta(minutes=5) # 'concurrency': 4 } dag = DAG('extract_github_commits', default_args=default_args, schedule_interval='@daily') hdfs_dir = 'hdfs://10.0.0.9:9000' # Check if the new dump exists yet, retry every hour until it does check_for_new_dump = HttpSensor(task_id='check_for_new_dump', http_conn_id='ghtorrent', method='HEAD', poke_interval=60 * 60, timeout=60 * 60 * 24, endpoint="""mongo-dump-{{ ds }}.tar.gz""", dag=dag) # Download the bson file download = BashOperator(task_id='download', bash_command=""" wget -qO- http://ghtorrent-downloads.ewi.tudelft.nl/mongo-daily/mongo-dump-{{ ds }}.tar.gz | tar xvz dump/github/commits.bson --strip-components=2 wait mv commits.bson ~/staging/commits_{{ ds }}.bson """, params={'hdfs_dir': hdfs_dir}, dag=dag) # this extracts the bson file
t3 = SimpleHttpOperator(task_id='put_op', method='PUT', endpoint='api/v1.0/nodes', data=json.dumps({"priority": 5}), headers={"Content-Type": "application/json"}, dag=dag) t4 = SimpleHttpOperator( task_id='del_op', method='DELETE', endpoint='api/v1.0/nodes', data="some=data", headers={"Content-Type": "application/x-www-form-urlencoded"}, dag=dag) sensor = HttpSensor(task_id='http_sensor_check', http_conn_id='http_default', endpoint='', params={}, response_check=lambda response: True if "Google" in response.content else False, poke_interval=5, dag=dag) t1.set_upstream(sensor) t2.set_upstream(t1) t3.set_upstream(t2) t4.set_upstream(t3) t5.set_upstream(t4)
# http_conn_id='rest-connection', # endpoint="/update?id={empId}".format(empId = Variable.get("id")), # method="PUT", # headers={"Content-Type": "application/json"}, # response_filter=lambda response: response.json(), # xcom_push=True, # dag=dag, # ) # [END howto_operator_http_task_del_op] # [START howto_operator_http_http_sensor_check] task_http_sensor_check = HttpSensor( task_id='api_health_check', http_conn_id=conn_id, endpoint='/', request_params={}, # response_check=lambda response: "httpbin" in response.text, poke_interval=5, # on_failure_callback=notify_email, dag=dag, ) # Task 3: Save JSON data locally # save_and_transform = PythonOperator( # task_id="save_and_transform", # python_callable=transform_json, # provide_context=True, # ) save_employee = PythonOperator(task_id="save_employee_transform", python_callable=save_emp_json, provide_context=True)
task_id='cms_data_pull', python_callable=cdp.run_cms_data_pull, op_kwargs={"website_link": "data.cms.gov", "token": None, "dataset_identifier": "xbte-dn4t", "crawl_limit": 5000, "db_url_full": constants.LOCAL_DB_URL, "db_url": "airflow_works", "schema": "sandbox", "table_name": "cms_drug_file"}, dag=dag_game_1) s1 = HttpSensor( task_id='http_sensor_check', http_conn_id='http_default', endpoint='', request_params={}, response_check=lambda response: True if "Google" in response.text else False, dag=dag_game_1, ) s2 = HttpSensor( task_id='cms_http_sensor', http_conn_id='cms_gov_http_id', endpoint='', request_params={}, dag=dag_game_1, ) t2.set_upstream(t1) t1.set_upstream(s1)
xcom_push=True, dag=dag) # retrieve the job id associated with the async call in t1 t2 = PythonOperator( task_id='weekly_dbm_advertiser_sync_jobid', python_callable=setSyncEndPoint, provide_context=True, dag=dag ) t3 = HttpSensor( task_id='weekly_dbm_advertiser_sync_status', http_conn_id='i2ap_processor', endpoint=Variable.get('weekly_dbm_advertiser_sync-statusEndpoint'), headers={"Content-Type": "application/json", "Tt-I2ap-Id": "*****@*****.**", "Tt-I2ap-Sec": "E8OLhEWWihzdpIz5"}, response_check=responseCheck, poke_interval=60, dag=dag) # Make the asynchronous call to the i2ap data job t4 = SimpleHttpOperator( task_id='weekly_dbm_partner_pull', endpoint='/Partner', method='POST', data=json.dumps({"start-date": startDate, "end-date": endDate, "restrict": "True", "history": "False", "version": Variable.get('weekly_dbm_partner_pull-version')}),
python_callable=print_context, dag=dag) qubole_task = QuboleOperator( task_id='qubole_task', command_type='shellcmd', script='ls /usr/lib/airflow', cluster_label='airflow-demo', fetch_logs= True, # If true, will fetch qubole command logs and concatenate them into corresponding airflow task logs # To attach tags to qubole command, auto attach 3 tags - dag_id, task_id, run_id qubole_conn_id= 'qubole_default', # Connection id to submit commands inside QDS, if not set "qubole_default" is used dag=dag) bash_task = BashOperator( task_id='bash_task', bash_command='echo "run_id={{ run_id }} | dag_run={{ dag_run }}"', dag=dag) http_sensor_task = HttpSensor(task_id='http_sensor_task', http_conn_id='http_default', endpoint='', request_params={}, response_check=lambda response: True if "Google" in str(response.content) else False, poke_interval=5, dag=dag) qubole_task.set_upstream(python_task) bash_task.set_upstream(python_task) http_sensor_task.set_upstream(python_task)
from datetime import datetime default_args = { 'owner': 'airflow', 'depends_on_past': True, 'start_date': datetime(2019, 1, 1), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False } dag = DAG('workshop_airflow_exo_2', default_args=default_args, schedule_interval="0 6 * * *") wait_for_right_time = HttpSensor( task_id='wait_for_right_time', http_conn_id='navitia', endpoint='journeys?from=2.2728894%3B48.8812988&to=2.2950275%3B48.8737917&', headers={'Authorization': '9cdfa8dd-4ed8-4411-a6eb-690d361fddf6'}, response_check=check_if_time_to_leave, dag=dag) send_mail = EmailOperator(task_id='send_mail', to=['*****@*****.**'], subject="You need to leave now!", html_content="Leave now if you want to be on time!", dag=dag) wait_for_right_time >> send_mail
# 'pool': 'backfill', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } dag_x = DAG("segmentstream_demo", default_args=default_args, schedule_interval=timedelta(days=1)) initial_task = DummyOperator(task_id='start', dag=dag_x) wait_for_currency_rates_service = HttpSensor( task_id="wait_for_currency_rates_service", dag=dag_x, http_conn_id='currency_service', method='GET', endpoint='get_rates', headers={"Content-Type": "application/json"}, request_params={'date': datetime.now().strftime('%d.%m.%Y')}, response_check=check_currency_response, ) wait_for_currency_rates_service << initial_task get_daily_conversion_rates = PythonOperator( task_id="get_daily_conversion_rates", python_callable=get_daily_conversion_rates_callback, provide_context=True, dag=dag_x) get_daily_conversion_rates.set_upstream( task_or_task_list=wait_for_currency_rates_service)
data=json.dumps(job_param_dict), xcom_push=True, response_check=lambda response: response.json().get('run_id') is not None, dag=dag) run_id_extractor = PythonOperator(task_id='extract_run_id', provide_context=True, python_callable=extract_run_id, dag=dag) state_http_sensor = HttpSensor( task_id='sensor_job_state', http_conn_id='databricks', timeout=timeout, method='GET', endpoint='/api/2.0/jobs/runs/get', request_params={ 'run_id': """{{ ti.xcom_pull(task_ids='extract_run_id') }}""" }, response_check=check_state, poke_interval=30, dag=dag) fetch_result_http_op = SimpleHttpOperator( task_id='http_get_to_databricks', http_conn_id='databricks', method='GET', data={'run_id': """{{ ti.xcom_pull(task_ids='extract_run_id') }}"""}, endpoint='/api/2.0/jobs/runs/get-output', xcom_push=True, response_check=lambda response: response.json()['metadata']['state'].get( 'result_state') == 'SUCCESS',