Ejemplo n.º 1
0
    def test_response_in_logs_after_failed_check(self, m):
        """
        Test that when using SimpleHttpOperator with log_response=True,
        the response is logged even if request_check fails
        """
        def response_check(response):
            return response.text != 'invalid response'

        m.get('http://www.example.com', text='invalid response')
        operator = SimpleHttpOperator(
            task_id='test_HTTP_op',
            method='GET',
            endpoint='/',
            http_conn_id='HTTP_EXAMPLE',
            log_response=True,
            response_check=response_check,
        )

        with mock.patch.object(operator.log, 'info') as mock_info:
            with pytest.raises(AirflowException):
                operator.execute({})
            calls = [
                mock.call('Calling HTTP method'),
                mock.call('invalid response')
            ]
            mock_info.assert_has_calls(calls, any_order=True)
Ejemplo n.º 2
0
 def test_get(self):
     op = SimpleHttpOperator(
         task_id='get_op',
         method='GET',
         endpoint='/search',
         data={"client": "ubuntu", "q": "airflow"},
         headers={},
         dag=self.dag,
     )
     op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
Ejemplo n.º 3
0
 def test_filters_response(self, m):
     m.get('http://www.example.com', json={'value': 5})
     operator = SimpleHttpOperator(
         task_id='test_HTTP_op',
         method='GET',
         endpoint='/',
         http_conn_id='HTTP_EXAMPLE',
         response_filter=lambda response: response.json(),
     )
     result = operator.execute({})
     assert result == {'value': 5}
Ejemplo n.º 4
0
 def test_get_response_check(self):
     op = SimpleHttpOperator(
         task_id='get_op',
         method='GET',
         endpoint='/search',
         data={"client": "ubuntu", "q": "airflow"},
         response_check=lambda response: ("apache/airflow" in response.text),
         headers={},
         dag=self.dag,
     )
     op.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
Ejemplo n.º 5
0
def example_dag_decorator(email: str = '*****@*****.**'):
    """
    DAG to send server IP to email.

    :param email: Email to send IP to. Defaults to [email protected].
    :type email: str
    """
    # Using default connection as it's set to httpbin.org by default
    get_ip = SimpleHttpOperator(task_id='get_ip', endpoint='get', method='GET')

    @task(multiple_outputs=True)
    def prepare_email(raw_json: str) -> Dict[str, str]:
        external_ip = json.loads(raw_json)['origin']
        return {
            'subject':
            f'Server connected from {external_ip}',
            'body':
            f'Seems like today your server executing Airflow is connected from IP {external_ip}<br>'
        }

    email_info = prepare_email(get_ip.output)

    EmailOperator(task_id='send_email',
                  to=email,
                  subject=email_info['subject'],
                  html_content=email_info['body'])
Ejemplo n.º 6
0
 def read_from_rest():
     return SimpleHttpOperator(
             task_id=TASK_DATA_API_CALL,
             http_conn_id=CONNECTION_ID,
             method="GET",
             endpoint="/",
             # data="{\"id\":111333222}",
             headers={"Content-Type": "application/json"},
             # response will be pushed to xcom with COLLABORATION_TASK_ID
             xcom_push=True,
             log_response=True,
         )
Ejemplo n.º 7
0
    def test_response_in_logs(self, m):
        """
        Test that when using SimpleHttpOperator with 'GET',
        the log contains 'Example Domain' in it
        """

        m.get('http://www.example.com', text='Example.com fake response')
        operator = SimpleHttpOperator(
            task_id='test_HTTP_op',
            method='GET',
            endpoint='/',
            http_conn_id='HTTP_EXAMPLE',
            log_response=True,
        )

        with mock.patch.object(operator.log, 'info') as mock_info:
            operator.execute(None)
            calls = [
                mock.call('Example.com fake response'),
                mock.call('Example.com fake response')
            ]
            mock_info.has_calls(calls)
def nyc_taxi_dataset_dag():

    check_file = SimpleHttpOperator(
        method='HEAD',
        endpoint='yellow_tripdata_{{ execution_date.strftime("%Y-%m") }}.csv',
        task_id='check_file',
        http_conn_id='nyc_yellow_taxi_id')

    @task
    def download_file():
        context = get_current_context()
        return download_dataset(context['execution_date'].strftime('%Y-%m'))

    @task
    def to_parquet(file_path: str):
        context = get_current_context()
        return convert_to_parquet(context['execution_date'].strftime('%Y-%m'),
                                  file_path)

    file_path = download_file()
    parquet_file_path = to_parquet(file_path)

    check_file >> file_path >> parquet_file_path
Ejemplo n.º 9
0
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

dag = DAG('purchase_order_http_operator', default_args=default_args, tags=['purchase order'], start_date=days_ago(2))

dag.doc_md = __doc__

# task_post_op, task_get_op and task_put_op are examples of tasks created by instantiating operators
# [START howto_operator_http_task_post_op]
task_post_op = SimpleHttpOperator(
    task_id='post_op',
    endpoint='https://stateset.network:8080/api/stateset/createPurchaseOrder',
    data=json.dumps({"priority": 5}),
    headers={"Content-Type": "application/json"},
    response_check=lambda response: response.json()['json']['priority'] == 5,
    dag=dag,
)
# [END howto_operator_http_task_post_op]
# [START howto_operator_http_task_post_op_formenc]
task_post_op_formenc = SimpleHttpOperator(
    task_id='post_op_formenc',
    endpoint='https://stateset.network:8080/api/stateset/createPurchaseOrder',
    data="name=Joe",
    headers={"Content-Type": "application/x-www-form-urlencoded"},
    dag=dag,
)
# [END howto_operator_http_task_post_op_formenc]
# [START howto_operator_http_task_get_op]
task_get_op = SimpleHttpOperator(
Ejemplo n.º 10
0
                firstname TEXT NOT NULL,
                lastname TEXT NOT NULL,
                country TEXT NOT NULL,
                username TEXT NOT NULL,
                password TEXT NOT NULL,
                email TEXT NOT NULL PRIMARY KEY
            );
        ''')

    is_api_available = HttpSensor(task_id='is_api_available',
                                  http_conn_id='user_api',
                                  endpoint='api/')

    extracting_user = SimpleHttpOperator(
        task_id='extracting_user',
        http_conn_id='user_api',
        endpoint='api/',
        method='GET',
        response_filter=lambda response: json.loads(response.text),
        log_response=True)

    processing_user = PythonOperator(task_id='processing_user',
                                     python_callable=_processing_user)

    storing_user = BashOperator(
        task_id='storing_user',
        bash_command=
        'echo -e ".separator ","\n.import /tmp/processed_user.csv users" | sqlite3 /home/airflow/airflow/airflow.db'
    )

    creating_table >> is_api_available >> extracting_user >> processing_user >> storing_user
Ejemplo n.º 11
0
                lastname TEXT NOT NULL,
                country TEXT NOT NULL,
                username TEXT NOT NULL,
                password TEXT NOT NULL,
                email TEXT NOT NULL PRIMARY KEY
            );
        """)

    is_api_available = HttpSensor(task_id="is_api_available",
                                  http_conn_id="user_api",
                                  endpoint="api/")

    extracting_user = SimpleHttpOperator(
        task_id="extracting_user",
        http_conn_id="user_api",
        endpoint="api/",
        method="GET",
        response_filter=lambda response: json.loads(response.text),
        log_response=False)

    processing_user = PythonOperator(task_id="processing_user",
                                     python_callable=_processing_user)

    storing_user = BashOperator(
        task_id="storing_user",
        bash_command=
        'echo -e ".separator ","\n.import /tmp/processed_user.csv users" | sqlite3 /home/airflow/airflow/airflow.db'
    )

    creating_table >> is_api_available >> extracting_user >> processing_user >> storing_user
import airflow.utils.dates
from airflow.models import DAG
from airflow.providers.http.operators.http import SimpleHttpOperator

dag = DAG(
    dag_id="secretsbackend_with_vault",
    start_date=airflow.utils.dates.days_ago(1),
    schedule_interval=None,
)

call_api = SimpleHttpOperator(
    task_id="call_api",
    http_conn_id="secure_api",
    method="GET",
    endpoint="",
    log_response=True,
    dag=dag,
)
Ejemplo n.º 13
0
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

dag = DAG('example_http_operator', default_args=default_args, tags=['example'])

dag.doc_md = __doc__

# t1, t2 and t3 are examples of tasks created by instantiating operators
t1 = SimpleHttpOperator(
    task_id='post_op',
    endpoint='post',
    data=json.dumps({"priority": 5}),
    headers={"Content-Type": "application/json"},
    response_check=lambda response: response.json()['json']['priority'] == 5,
    dag=dag,
)

t5 = SimpleHttpOperator(
    task_id='post_op_formenc',
    endpoint='post',
    data="name=Joe",
    headers={"Content-Type": "application/x-www-form-urlencoded"},
    dag=dag,
)

t2 = SimpleHttpOperator(
    task_id='get_op',
    method='GET',
Ejemplo n.º 14
0
"""

with DAG(
        "simplehttp01",
        schedule_interval="@once",
        start_date=days_ago(0),
        default_args={},
        tags=["http"],
        doc_md=_dag_doc_md,
) as dag:
    dum1 = DummyOperator(task_id="http1")

    rest_call_ex01 = SimpleHttpOperator(
        task_id="rest-call-ex01",
        http_conn_id="http-disease.sh",
        endpoint="/v3/covid-19/nyt/states",
        method="GET",
        log_response=True,
    )

    @task
    def transform(multiple_outputs=True) -> Sequence:
        """
        #### Transform task
        A simple Transform task which takes in the collection of order data and
        computes the total order value.
        """
        context = get_current_context()
        ti = context["ti"]
        data: Sequence = ti.xcom_pull(task_ids="rest-call-ex01",
                                      key="return_value")
Ejemplo n.º 15
0
from utils import load_df_into_db

with DAG(
        dag_id='paypal_daily',
        start_date=datetime(2019, 1, 1),
        schedule_interval='@daily',
        tags=['transactions', 'rest-api', 'private', 'elt']
) as dag:
    task_extract_transactions = SimpleHttpOperator(
        endpoint='/v1/reporting/transactions',
        method='GET',
        data={
            'start_date': '{{ execution_date.isoformat() }}',
            'end_date': '{{ next_execution_date.isoformat() }}'
        },
        headers={
            'Content-Type': 'application/json'
        },
        http_conn_id='http_paypal',
        task_id='extract_transactions'
    )


    def load_transactions(data: str) -> None:
        # Read data from xcom
        paypal_transactions = data
        # Load data into db
        load_df_into_db(
            data_frame=pd.DataFrame({
                'VALUE': [paypal_transactions],
    user_map = {
        'firstname': user['name']['first'],
        'lastname': user['name']['last']
    }
    processed_user = json.dumps(user_map)
    Variable.set("user", processed_user)


with DAG('user_data_processing',
         schedule_interval='@daily',
         default_args=default_args,
         catchup=False) as dag:
    is_api_available = HttpSensor(task_id='is_api_available',
                                  http_conn_id='user_api',
                                  endpoint='api/')

    fetch_user = SimpleHttpOperator(task_id='fetch_user',
                                    http_conn_id='user_api',
                                    endpoint='api/',
                                    method='GET')

    processing_user = PythonOperator(task_id='processing_user',
                                     python_callable=_processing_user)

    print_user = BashOperator(
        task_id='log_user',
        bash_command='echo "{{ var.value.user }}  {{ params.customer_key }}"',
        params={"customer_key": "value"})

    is_api_available >> fetch_user >> processing_user >> print_user
Ejemplo n.º 17
0
        );
    """,
)

check_api_available = HttpSensor(
    dag=dag,
    task_id="check_api_available",
    http_conn_id=_API_NAME,
    endpoint="api/",
)

extract_user = SimpleHttpOperator(
    dag=dag,
    task_id="extract_user",
    http_conn_id=_API_NAME,
    endpoint="api/",
    method="GET",
    response_filter=lambda response: json.loads(response.text),
    log_response=True,
)

process_user = PythonOperator(
    dag=dag,
    task_id="process_user",
    python_callable=_process_user,
)

store_user = BashOperator(
    dag=dag,
    task_id="store_user",
    bash_command=f"""
Ejemplo n.º 18
0
import os
import datetime as dt

import requests
from airflow import DAG
from airflow.decorators import task
from airflow.providers.http.operators.http import SimpleHttpOperator

with DAG(dag_id='titanic_dag',
         start_date=dt.datetime(2021, 3, 1),
         schedule_interval='@once') as dag:

    check_if_file_exists = SimpleHttpOperator(
        method='HEAD',
        task_id='check_file_existence',
        http_conn_id='web_stanford_http_id',
        endpoint='/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv',
    )

    @task
    def download_titanic_dataset():
        url = 'https://web.stanford.edu/class/archive/cs/cs109/cs109.1166/stuff/titanic.csv'
        response = requests.get(url, stream=True)
        response.raise_for_status()
        filepath = os.path.join(os.path.expanduser('~'), 'titanic.csv')
        with open(filepath, 'w', encoding='utf-8') as f:
            for chunk in response.iter_lines():
                f.write('{}\n'.format(chunk.decode('utf-8')))
        return filepath

    @task
Ejemplo n.º 19
0
    task_id='remove_temp_file',
    bash_command=removetempfile,
    dag=dag,
)
## change it suitable to your setting
t_analytics = LocalFilesystemToGCSOperator(task_id="uploadtostorage",
                                           src=destination_file,
                                           dst=gcsdir,
                                           bucket=GCS_BUCKET,
                                           gcp_conn_id=GCS_CONN_ID,
                                           dag=dag)
## change it suitable for your setting
t_sendresult = SimpleHttpOperator(task_id='sendnotification',
                                  method='POST',
                                  http_conn_id='notificationserver',
                                  endpoint='api/logUpdate',
                                  data=json.dumps({"source_file":
                                                   source_file}),
                                  headers={"Content-Type": "application/json"},
                                  dag=dag)
'''
the dependencies among tasks
'''
t_downloadlogtocloud >> t_analytics
t_analytics >> fork
fork >> t_sendresult
t_analytics >> fork
fork >> t_removefile
t_removefile >> join
t_sendresult >> join