def test_poke_exception(self): """ Exception occurs in poke function should not be ignored. """ def resp_check(resp): raise AirflowException('AirflowException raised here!') task = HttpSensor(task_id='http_sensor_poke_exception', http_conn_id='http_default', endpoint='', params={}, response_check=resp_check, poke_interval=5) with self.assertRaisesRegexp(AirflowException, 'AirflowException raised here!'): task.execute(None)
def test_poke_exception(self): """ Exception occurs in poke function should not be ignored. """ def resp_check(resp): raise AirflowException('AirflowException raised here!') task = HttpSensor( task_id='http_sensor_poke_exception', http_conn_id='http_default', endpoint='', params={}, response_check=resp_check, poke_interval=5) with self.assertRaisesRegexp(AirflowException, 'AirflowException raised here!'): task.execute(None)
t3 = SimpleHttpOperator(task_id='put_op', method='PUT', endpoint='api/v1.0/nodes', data=json.dumps({"priority": 5}), headers={"Content-Type": "application/json"}, dag=dag) t4 = SimpleHttpOperator( task_id='del_op', method='DELETE', endpoint='api/v1.0/nodes', data="some=data", headers={"Content-Type": "application/x-www-form-urlencoded"}, dag=dag) sensor = HttpSensor(task_id='http_sensor_check', http_conn_id='http_default', endpoint='', params={}, response_check=lambda response: True if "Google" in response.content else False, poke_interval=5, dag=dag) t1.set_upstream(sensor) t2.set_upstream(t1) t3.set_upstream(t2) t4.set_upstream(t3) t5.set_upstream(t4)
"key": "country", "value": "Germany" }] } } }), headers=headers, xcom_push=True, dag=dag, ) wait_for_dataprep_job_to_complete = HttpSensor( task_id='wait_for_dataprep_job_to_complete', endpoint= '/v4/jobGroups/{{ json.loads(ti.xcom_pull(task_ids="run_dataprep_job"))["id"] }}?embed=jobs.errorMessage', headers=headers, response_check=check_dataprep_run_complete, poke_interval=10, dag=dag, ) bigquery_run_sql = BigQueryOperator( task_id='bq_run_sql', use_legacy_sql=False, write_disposition='WRITE_TRUNCATE', allow_large_results=True, bql=''' #standardsql SELECT stories.score AS stories_score, COUNT(stories.id) AS stories_count
'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5), } dag = DAG('druid-ingest-covid', default_args=default_args) dag.doc_md = __doc__ check_data = HttpSensor( task_id='covid-data-check', conn_id='http_default', endpoint='{{ macros.ds_format(ds, "%Y-%m-%d", "%d-%m-%Y") }}.csv', params={}, response_check=lambda response: True if response.status_code == 200 else False, poke_interval=5, dag=dag) def post_task(ds): endpoint = macros.ds_format(ds, "%Y-%m-%d", "%d-%m-%Y") + '.csv' http_conn_host = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/' url = http_conn_host + endpoint with open('spec.json') as f: druid_spec = json.load(f) druid_spec['spec']['ioConfig']['inputSource']['uris'] = [url]
dag = DAG('mentions_data', default_args=default_args, schedule_interval='*/15 * * * *') # t1, t2 and t3 are examples of tasks created by instantiating operators t1 = PythonOperator(task_id='print_date', provide_context=True, python_callable=get_date, dag=dag) sensor = HttpSensor( task_id='check_for_new_dump', http_conn_id='http_default', method='HEAD', poke_interval=5, timeout=15 * 60, endpoint="{{ ti.xcom_pull(task_ids='print_date' )}}.mentions.CSV.zip", dag=dag) t2 = BashOperator( task_id='producer', bash_command= "python /usr/local/kafka/airflow_producer_mentions.py {{ ti.xcom_pull(task_ids='print_date' )}}", retries=3, dag=dag) t3 = BashOperator( task_id='consumer', bash_command='python /usr/local/kafka/airflow_consumer_mentions.py', retries=3,
def sensor_factory(sid, config, dagdict): default_paras = { 'TimeSensor': { 'poke_interval': 60, 'timeout': 60 * 60 * 24, 'hour': 0, 'minute': 0 }, 'SqlSensor': { 'poke_interval': 60, 'timeout': 60 * 60 * 24, 'sql': '', 'conn_id': '' }, 'HivePartitionSensor': { 'poke_interval': 60 * 5, 'timeout': 60 * 60 * 24, 'table': '', 'partition': '', 'metastore_conn_id': '', 'schema': 'default' }, 'HdfsSensor': { 'poke_interval': 60, 'timeout': 60 * 60 * 24, 'filepath': '', 'hdfs_conn_id': 'hdfs_default' }, 'HttpSensor': { 'poke_interval': 60, 'timeout': 60 * 60 * 24, 'endpoint': '', 'http_conn_id': 'http_default', 'params': None, 'headers': None, 'response_check': None } } c = default_paras[config['type']] c.update(config) sensor_type = c['type'] dag = dagdict[c['dag_id']] if sensor_type == 'TimeSensor': target_time = time(c['hour'], c['minute']) return TimeSensor(target_time=target_time, task_id=sid, dag=dag, poke_interval=c['poke_interval'], timeout=c['timeout']) elif sensor_type == 'SqlSensor': return SqlSensor(sql=c['sql'], conn_id=c['conn_id'], task_id=sid, dag=dag, poke_interval=c['poke_interval'], timeout=c['timeout']) elif sensor_type == 'HivePartitionSensor': return HivePartitionSensor(table=c['table'], partition=c['partition'], schema=c['schema'], metastore_conn_id=c['metastore_conn_id'], task_id=sid, dag=dag, poke_interval=c['poke_interval'], timeout=c['timeout']) elif sensor_type == 'HdfsSensor': return HdfsSensor(task_id=sid, dag=dag, filepath=c['filepath'], hdfs_conn_id=c['hdfs_conn_id'], poke_interval=c['poke_interval'], timeout=c['timeout']) elif sensor_type == 'HttpSensor': return HttpSensor(task_id=sid, dag=dag, endpoint=c['endpoint'], http_conn_id=c['http_conn_id'], poke_interval=c['poke_interval'], timeout=c['timeout'])
params={}, retries=1, dag=dag) t1 = SSHExecuteOperator( task_id='verify_transfer_to_remote', ssh_hook = sssh_hook_01, bash_command= AIRFLOW_HOME + '/dags/echo_date.sh', params={}, retries=1, dag=dag) domain01_sensor = HttpSensor( task_id='domain01_sensor', endpoint='', http_conn_id='http_domain01', retries=1, params={}, dag=dag) domain02_sensor = HttpSensor( task_id='domain02_sensor', endpoint='', http_conn_id='http_domain02', retries=1, params={}, dag=dag) domain03_sensor = HttpSensor( task_id='domain03_sensor', endpoint='',
t3 = SimpleHttpOperator(task_id='put_op', method='PUT', endpoint='api/v1.0/nodes', data=json.dumps({"priority": 5}), headers={"Content-Type": "application/json"}, dag=dag) t4 = SimpleHttpOperator( task_id='del_op', method='DELETE', endpoint='api/v1.0/nodes', data="some=data", headers={"Content-Type": "application/x-www-form-urlencoded"}, dag=dag) sensor = HttpSensor(task_id='http_sensor_check', conn_id='http_default', endpoint='api/v1.0/apps', params={}, headers={"Content-Type": "application/json"}, response_check=lambda response: True if "collation" in response.content else False, poke_interval=5, dag=dag) t1.set_upstream(sensor) t2.set_upstream(t1) t3.set_upstream(t2) t4.set_upstream(t3) t5.set_upstream(t4)
'owner': 'airflow', 'depends_on_past': False, 'start_date': datetime(2018, 7, 1), 'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=1), # 'queue': 'bash_queue', # 'pool': 'backfill', # 'priority_weight': 10, # 'end_date': datetime(2016, 1, 1), } dag = DAG('lin1000_domain_checker', default_args=default_args, schedule_interval="* * * * *") domain01_sensor = HttpSensor(task_id='lin1000_domain_sensor', endpoint='', http_conn_id='lin1000_domain_http', retries=1, params={}, dag=dag) dummy_operator = DummyOperator( task_id='dummy_task', dag=dag, ) dummy_operator.set_upstream(domain01_sensor)