def test_response_in_logs_after_failed_check(self, m): """ Test that when using SimpleHttpOperator with log_response=True, the response is logged even if request_check fails """ def response_check(response): return response.text != 'invalid response' m.get('http://www.example.com', text='invalid response') operator = SimpleHttpOperator( task_id='test_HTTP_op', method='GET', endpoint='/', http_conn_id='HTTP_EXAMPLE', log_response=True, response_check=response_check, ) with mock.patch.object(operator.log, 'info') as mock_info: with pytest.raises(AirflowException): operator.execute({}) calls = [ mock.call('Calling HTTP method'), mock.call('invalid response') ] mock_info.assert_has_calls(calls, any_order=True)
def test_get(self): op = SimpleHttpOperator( task_id='get_op', method='GET', endpoint='/search', data={"client": "ubuntu", "q": "airflow"}, headers={}, dag=self.dag, ) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def test_filters_response(self, m): m.get('http://www.example.com', json={'value': 5}) operator = SimpleHttpOperator( task_id='test_HTTP_op', method='GET', endpoint='/', http_conn_id='HTTP_EXAMPLE', response_filter=lambda response: response.json(), ) result = operator.execute({}) assert result == {'value': 5}
def test_get_response_check(self): op = SimpleHttpOperator( task_id='get_op', method='GET', endpoint='/search', data={"client": "ubuntu", "q": "airflow"}, response_check=lambda response: ("apache/airflow" in response.text), headers={}, dag=self.dag, ) op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
def example_dag_decorator(email: str = '*****@*****.**'): """ DAG to send server IP to email. :param email: Email to send IP to. Defaults to [email protected]. :type email: str """ # Using default connection as it's set to httpbin.org by default get_ip = SimpleHttpOperator(task_id='get_ip', endpoint='get', method='GET') @task(multiple_outputs=True) def prepare_email(raw_json: str) -> Dict[str, str]: external_ip = json.loads(raw_json)['origin'] return { 'subject': f'Server connected from {external_ip}', 'body': f'Seems like today your server executing Airflow is connected from IP {external_ip}<br>' } email_info = prepare_email(get_ip.output) EmailOperator(task_id='send_email', to=email, subject=email_info['subject'], html_content=email_info['body'])
def read_from_rest(): return SimpleHttpOperator( task_id=TASK_DATA_API_CALL, http_conn_id=CONNECTION_ID, method="GET", endpoint="/", # data="{\"id\":111333222}", headers={"Content-Type": "application/json"}, # response will be pushed to xcom with COLLABORATION_TASK_ID xcom_push=True, log_response=True, )
def test_response_in_logs(self, m): """ Test that when using SimpleHttpOperator with 'GET', the log contains 'Example Domain' in it """ m.get('http://www.example.com', text='Example.com fake response') operator = SimpleHttpOperator( task_id='test_HTTP_op', method='GET', endpoint='/', http_conn_id='HTTP_EXAMPLE', log_response=True, ) with mock.patch.object(operator.log, 'info') as mock_info: operator.execute(None) calls = [ mock.call('Example.com fake response'), mock.call('Example.com fake response') ] mock_info.has_calls(calls)
def nyc_taxi_dataset_dag(): check_file = SimpleHttpOperator( method='HEAD', endpoint='yellow_tripdata_{{ execution_date.strftime("%Y-%m") }}.csv', task_id='check_file', http_conn_id='nyc_yellow_taxi_id') @task def download_file(): context = get_current_context() return download_dataset(context['execution_date'].strftime('%Y-%m')) @task def to_parquet(file_path: str): context = get_current_context() return convert_to_parquet(context['execution_date'].strftime('%Y-%m'), file_path) file_path = download_file() parquet_file_path = to_parquet(file_path) check_file >> file_path >> parquet_file_path
'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5), } dag = DAG('purchase_order_http_operator', default_args=default_args, tags=['purchase order'], start_date=days_ago(2)) dag.doc_md = __doc__ # task_post_op, task_get_op and task_put_op are examples of tasks created by instantiating operators # [START howto_operator_http_task_post_op] task_post_op = SimpleHttpOperator( task_id='post_op', endpoint='https://stateset.network:8080/api/stateset/createPurchaseOrder', data=json.dumps({"priority": 5}), headers={"Content-Type": "application/json"}, response_check=lambda response: response.json()['json']['priority'] == 5, dag=dag, ) # [END howto_operator_http_task_post_op] # [START howto_operator_http_task_post_op_formenc] task_post_op_formenc = SimpleHttpOperator( task_id='post_op_formenc', endpoint='https://stateset.network:8080/api/stateset/createPurchaseOrder', data="name=Joe", headers={"Content-Type": "application/x-www-form-urlencoded"}, dag=dag, ) # [END howto_operator_http_task_post_op_formenc] # [START howto_operator_http_task_get_op] task_get_op = SimpleHttpOperator(
firstname TEXT NOT NULL, lastname TEXT NOT NULL, country TEXT NOT NULL, username TEXT NOT NULL, password TEXT NOT NULL, email TEXT NOT NULL PRIMARY KEY ); ''') is_api_available = HttpSensor(task_id='is_api_available', http_conn_id='user_api', endpoint='api/') extracting_user = SimpleHttpOperator( task_id='extracting_user', http_conn_id='user_api', endpoint='api/', method='GET', response_filter=lambda response: json.loads(response.text), log_response=True) processing_user = PythonOperator(task_id='processing_user', python_callable=_processing_user) storing_user = BashOperator( task_id='storing_user', bash_command= 'echo -e ".separator ","\n.import /tmp/processed_user.csv users" | sqlite3 /home/airflow/airflow/airflow.db' ) creating_table >> is_api_available >> extracting_user >> processing_user >> storing_user
lastname TEXT NOT NULL, country TEXT NOT NULL, username TEXT NOT NULL, password TEXT NOT NULL, email TEXT NOT NULL PRIMARY KEY ); """) is_api_available = HttpSensor(task_id="is_api_available", http_conn_id="user_api", endpoint="api/") extracting_user = SimpleHttpOperator( task_id="extracting_user", http_conn_id="user_api", endpoint="api/", method="GET", response_filter=lambda response: json.loads(response.text), log_response=False) processing_user = PythonOperator(task_id="processing_user", python_callable=_processing_user) storing_user = BashOperator( task_id="storing_user", bash_command= 'echo -e ".separator ","\n.import /tmp/processed_user.csv users" | sqlite3 /home/airflow/airflow/airflow.db' ) creating_table >> is_api_available >> extracting_user >> processing_user >> storing_user
import airflow.utils.dates from airflow.models import DAG from airflow.providers.http.operators.http import SimpleHttpOperator dag = DAG( dag_id="secretsbackend_with_vault", start_date=airflow.utils.dates.days_ago(1), schedule_interval=None, ) call_api = SimpleHttpOperator( task_id="call_api", http_conn_id="secure_api", method="GET", endpoint="", log_response=True, dag=dag, )
'email': ['*****@*****.**'], 'email_on_failure': False, 'email_on_retry': False, 'retries': 1, 'retry_delay': timedelta(minutes=5), } dag = DAG('example_http_operator', default_args=default_args, tags=['example']) dag.doc_md = __doc__ # t1, t2 and t3 are examples of tasks created by instantiating operators t1 = SimpleHttpOperator( task_id='post_op', endpoint='post', data=json.dumps({"priority": 5}), headers={"Content-Type": "application/json"}, response_check=lambda response: response.json()['json']['priority'] == 5, dag=dag, ) t5 = SimpleHttpOperator( task_id='post_op_formenc', endpoint='post', data="name=Joe", headers={"Content-Type": "application/x-www-form-urlencoded"}, dag=dag, ) t2 = SimpleHttpOperator( task_id='get_op', method='GET',
""" with DAG( "simplehttp01", schedule_interval="@once", start_date=days_ago(0), default_args={}, tags=["http"], doc_md=_dag_doc_md, ) as dag: dum1 = DummyOperator(task_id="http1") rest_call_ex01 = SimpleHttpOperator( task_id="rest-call-ex01", http_conn_id="http-disease.sh", endpoint="/v3/covid-19/nyt/states", method="GET", log_response=True, ) @task def transform(multiple_outputs=True) -> Sequence: """ #### Transform task A simple Transform task which takes in the collection of order data and computes the total order value. """ context = get_current_context() ti = context["ti"] data: Sequence = ti.xcom_pull(task_ids="rest-call-ex01", key="return_value")
from utils import load_df_into_db with DAG( dag_id='paypal_daily', start_date=datetime(2019, 1, 1), schedule_interval='@daily', tags=['transactions', 'rest-api', 'private', 'elt'] ) as dag: task_extract_transactions = SimpleHttpOperator( endpoint='/v1/reporting/transactions', method='GET', data={ 'start_date': '{{ execution_date.isoformat() }}', 'end_date': '{{ next_execution_date.isoformat() }}' }, headers={ 'Content-Type': 'application/json' }, http_conn_id='http_paypal', task_id='extract_transactions' ) def load_transactions(data: str) -> None: # Read data from xcom paypal_transactions = data # Load data into db load_df_into_db( data_frame=pd.DataFrame({ 'VALUE': [paypal_transactions],
user_map = { 'firstname': user['name']['first'], 'lastname': user['name']['last'] } processed_user = json.dumps(user_map) Variable.set("user", processed_user) with DAG('user_data_processing', schedule_interval='@daily', default_args=default_args, catchup=False) as dag: is_api_available = HttpSensor(task_id='is_api_available', http_conn_id='user_api', endpoint='api/') fetch_user = SimpleHttpOperator(task_id='fetch_user', http_conn_id='user_api', endpoint='api/', method='GET') processing_user = PythonOperator(task_id='processing_user', python_callable=_processing_user) print_user = BashOperator( task_id='log_user', bash_command='echo "{{ var.value.user }} {{ params.customer_key }}"', params={"customer_key": "value"}) is_api_available >> fetch_user >> processing_user >> print_user
); """, ) check_api_available = HttpSensor( dag=dag, task_id="check_api_available", http_conn_id=_API_NAME, endpoint="api/", ) extract_user = SimpleHttpOperator( dag=dag, task_id="extract_user", http_conn_id=_API_NAME, endpoint="api/", method="GET", response_filter=lambda response: json.loads(response.text), log_response=True, ) process_user = PythonOperator( dag=dag, task_id="process_user", python_callable=_process_user, ) store_user = BashOperator( dag=dag, task_id="store_user", bash_command=f"""
import os import datetime as dt import requests from airflow import DAG from airflow.decorators import task from airflow.providers.http.operators.http import SimpleHttpOperator with DAG(dag_id='titanic_dag', start_date=dt.datetime(2021, 3, 1), schedule_interval='@once') as dag: check_if_file_exists = SimpleHttpOperator( method='HEAD', task_id='check_file_existence', http_conn_id='web_stanford_http_id', endpoint='/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv', ) @task def download_titanic_dataset(): url = 'https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv' response = requests.get(url, stream=True) response.raise_for_status() filepath = os.path.join(os.path.expanduser('~'), 'titanic.csv') with open(filepath, 'w', encoding='utf-8') as f: for chunk in response.iter_lines(): f.write('{}\n'.format(chunk.decode('utf-8'))) return filepath @task
task_id='remove_temp_file', bash_command=removetempfile, dag=dag, ) ## change it suitable to your setting t_analytics = LocalFilesystemToGCSOperator(task_id="uploadtostorage", src=destination_file, dst=gcsdir, bucket=GCS_BUCKET, gcp_conn_id=GCS_CONN_ID, dag=dag) ## change it suitable for your setting t_sendresult = SimpleHttpOperator(task_id='sendnotification', method='POST', http_conn_id='notificationserver', endpoint='api/logUpdate', data=json.dumps({"source_file": source_file}), headers={"Content-Type": "application/json"}, dag=dag) ''' the dependencies among tasks ''' t_downloadlogtocloud >> t_analytics t_analytics >> fork fork >> t_sendresult t_analytics >> fork fork >> t_removefile t_removefile >> join t_sendresult >> join