예제 #1
0
    def test_logging_head_error_request(self, mock_session_send):
        def resp_check(_):
            return True

        response = requests.Response()
        response.status_code = 404
        response.reason = 'Not Found'
        response._content = b'This endpoint doesnt exist'
        mock_session_send.return_value = response

        task = HttpSensor(dag=self.dag,
                          task_id='http_sensor_head_method',
                          http_conn_id='http_default',
                          endpoint='',
                          request_params={},
                          method='HEAD',
                          response_check=resp_check,
                          timeout=5,
                          poke_interval=1)

        with mock.patch.object(task.hook.log, 'error') as mock_errors:
            with self.assertRaises(AirflowSensorTimeout):
                task.execute(None)

            self.assertTrue(mock_errors.called)
            calls = [
                mock.call('HTTP error: %s', 'Not Found'),
                mock.call('This endpoint doesnt exist'),
                mock.call('HTTP error: %s', 'Not Found'),
                mock.call('This endpoint doesnt exist'),
                mock.call('HTTP error: %s', 'Not Found'),
                mock.call('This endpoint doesnt exist'),
                mock.call('HTTP error: %s', 'Not Found'),
                mock.call('This endpoint doesnt exist'),
                mock.call('HTTP error: %s', 'Not Found'),
                mock.call('This endpoint doesnt exist'),
                mock.call('HTTP error: %s', 'Not Found'),
                mock.call('This endpoint doesnt exist'),
            ]
            mock_errors.assert_has_calls(calls)
예제 #2
0
    def test_poke_exception(self, mock_session_send):
        """
        Exception occurs in poke function should not be ignored.
        """
        response = requests.Response()
        response.status_code = 200
        mock_session_send.return_value = response

        def resp_check(_):
            raise AirflowException('AirflowException raised here!')

        task = HttpSensor(
            task_id='http_sensor_poke_exception',
            http_conn_id='http_default',
            endpoint='',
            request_params={},
            response_check=resp_check,
            timeout=5,
            poke_interval=1,
        )
        with self.assertRaisesRegex(AirflowException, 'AirflowException raised here!'):
            task.execute(context={})
예제 #3
0
    def test_poke_context(self, mock_session_send):
        response = requests.Response()
        response.status_code = 200
        mock_session_send.return_value = response

        def resp_check(_, execution_date):
            if execution_date == DEFAULT_DATE:
                return True
            raise AirflowException('AirflowException raised here!')

        task = HttpSensor(
            task_id='http_sensor_poke_exception',
            http_conn_id='http_default',
            endpoint='',
            request_params={},
            response_check=resp_check,
            timeout=5,
            poke_interval=1,
            dag=self.dag,
        )

        task_instance = TaskInstance(task=task, execution_date=DEFAULT_DATE)
        task.execute(task_instance.get_template_context())
    def test_head_method(self, mock_session_send):
        def resp_check(_):
            return True

        task = HttpSensor(dag=self.dag,
                          task_id='http_sensor_head_method',
                          http_conn_id='http_default',
                          endpoint='',
                          request_params={},
                          method='HEAD',
                          response_check=resp_check,
                          timeout=5,
                          poke_interval=1)

        task.execute(context={})

        args, kwargs = mock_session_send.call_args
        received_request = args[0]

        prep_request = requests.Request('HEAD', 'https://www.httpbin.org',
                                        {}).prepare()

        self.assertEqual(prep_request.url, received_request.url)
        self.assertTrue(prep_request.method, received_request.method)
예제 #5
0
                outdata['rates'][pair] = indata['rates'][pair]
            with open('/opt/airflow/files/forex_rates.json', 'a') as outfile:
                json.dump(outdata, outfile)
                outfile.write('\n')


with DAG("forex_data_pipeline",
         start_date=datetime(2021, 1, 1),
         schedule_interval="@daily",
         default_args=default_args,
         catchup=False) as dag:

    is_forex_rates_available = HttpSensor(
        task_id="is_forex_rates_available",
        http_conn_id="forex_api",
        endpoint="marclamberti/f45f872dea4dfd3eaa015a4a1af4b39b",
        response_check=lambda response: "rates" in response.text,
        poke_interval=5,
        timeout=20)

    is_forex_currencies_file_available = FileSensor(
        task_id="is_forex_currencies_file_available",
        fs_conn_id="forex_path",
        filepath="forex_currencies.csv",
        poke_interval=5,
        timeout=20)

    downloading_rates = PythonOperator(task_id="downloading_rates",
                                       python_callable=download_rates)

    is_forex_rates_available >> is_forex_currencies_file_available >> downloading_rates
예제 #6
0
from airflow import DAG
from airflow.providers.http.sensors.http import HttpSensor
from airflow.operators.bash import BashOperator
from datetime import timedelta
from airflow.utils.dates import days_ago

with DAG(dag_id="SensorExample",
         start_date=days_ago(1),
         schedule_interval="@daily",
         catchup=False) as dag:
    sensor = HttpSensor(task_id="httpsensor",
                        endpoint="/",
                        http_conn_id="http_conn",
                        retries=5,
                        retry_delay=timedelta(seconds=3))
    task1 = BashOperator(task_id="task1", bash_command="echo hello task1")
    task2 = BashOperator(task_id="task2", bash_command="echo hello task2")

    sensor >> [task1, task2]
예제 #7
0
            pg_hook.run(insert_statement, parameters=row)

            os.remove(tot_name)
        else:
            print("No file named {}".format(tot_name))
    else:
        print("No file named {}.  No data to load.".format(tot_name))


with DAG('fetch_kc_crime_data-v0.1',
         schedule_interval='@daily',
         default_args=default_args,
         catchup=False) as dag:

    check_endpoint_availability = HttpSensor(
        task_id='check_endpoint_availability',
        http_conn_id='http_data_kcmo_org',
        endpoint=app_config['endpoint'])

    download_latest_crime_data = PythonOperator(
        task_id='download_latest_crime_data',
        python_callable=download_latest_crime_data)

    load_data = PythonOperator(task_id='load_data_raw',
                               python_callable=load_data_raw)

    fetch_crime_window = PythonOperator(task_id='fetch_crime_window',
                                        python_callable=fetch_crime_window)

    check_endpoint_availability >> fetch_crime_window >> download_latest_crime_data >> load_data
with DAG("covid19_data_processing",
         schedule_interval="@daily",
         default_args=default_args,
         start_date=timezone.datetime(2021, 3, 1),
         tags=["covid19", "odds"]) as dag:

    start = DummyOperator(task_id="start")

    print_prev_ds = BashOperator(
        task_id="print_prev_ds",
        bash_command="echo {{ prev_ds }}",
    )

    check_api = HttpSensor(
        task_id="check_api",
        endpoint="world",
        response_check=lambda response: True
        if len(response.json()) > 0 else False,
    )

    download_covid19_data = PythonOperator(
        task_id="download_covid19_data",
        python_callable=_download_covid19_data,
    )

    create_table = SqliteOperator(task_id="create_db",
                                  sqlite_conn_id="sqlite_default",
                                  sql="""
            CREATE TABLE IF NOT EXISTS covid19 (
                NewConfirmed TEXT NOT NULL
            );
        """)
예제 #9
0
t3 = SimpleHttpOperator(
    task_id='put_op',
    method='PUT',
    endpoint='put',
    data=json.dumps({"priority": 5}),
    headers={"Content-Type": "application/json"},
    dag=dag,
)

t4 = SimpleHttpOperator(
    task_id='del_op',
    method='DELETE',
    endpoint='delete',
    data="some=data",
    headers={"Content-Type": "application/x-www-form-urlencoded"},
    dag=dag,
)

sensor = HttpSensor(
    task_id='http_sensor_check',
    http_conn_id='http_default',
    endpoint='',
    request_params={},
    response_check=lambda response: "httpbin" in response.text,
    poke_interval=5,
    dag=dag,
)

sensor >> t1 >> t2 >> t3 >> t4 >> t5
default_args = {
    'owner': 'airflow',
}


with DAG(
    start_date=dt.datetime(2021, 1, 1),
    dag_id='nyc_taxi_2021_dag',
    schedule_interval='@monthly',
    default_args=default_args,
) as dag:

    check_if_exists = HttpSensor(
        method='HEAD',
        endpoint='yellow_tripdata_{{ execution_date.strftime("%Y-%m") }}.csv',
        http_conn_id='nyc_yellow_taxi_id',
        task_id='check_if_exists',
        poke_interval=60 * 60 * 24,  # every 24 hours
        mode='reschedule',
    )

    @task
    def download_file():
        context = get_current_context()
        return download_dataset(context['execution_date'].strftime('%Y-%m'))

    @task
    def to_parquet(file_path: str):
        context = get_current_context()
        return convert_to_parquet(context['execution_date'].strftime('%Y-%m'), file_path)

    file_path = download_file()
예제 #11
0
    df["date"] = today_date
    df = df[["city", "temperature", "humidity", "date"]]
    df = df.sort_values("city").reset_index(drop=True)
    df.to_csv("ukraine_weather_report_{}.csv".format(today_date), index=False)
    df = pandas.read_csv("ukraine_weather_report_{}.csv".format(today_date))


with DAG(dag_id="weather_data_pipeline",
         schedule_interval="0 12 * * *",
         default_args=default_args,
         catchup=False) as dag:

    is_weather_api_available = HttpSensor(
        task_id="is_weather_api_available",
        method="GET",
        http_conn_id="weather_api_conn",
        endpoint="current?access_key={}&query=Kiev".format(key),
        response_check=lambda response: "request" in response.json(),
        poke_interval=5,
        timeout=20)

    _create_report = PythonOperator(task_id="create_report",
                                    python_callable=create_report,
                                    provide_context=True,
                                    dag=dag)

    for query in city_queries.keys():
        task_id = "get_weather_{}".format(city_queries[query])

        _get_weather = PythonOperator(task_id=task_id,
                                      op_kwargs={"query": query},
                                      python_callable=get_weather,
예제 #12
0
with DAG(
        'covid19_data_processing',
        schedule_interval='@daily',
        default_args=default_args,  # Dont forget to add default_args here
        start_date=timezone.datetime(2021, 3, 1),
        tags=['covid19', 'ODDS']) as dag:

    # Defining Operator task
    start = DummyOperator(task_id='start')

    print_prev_ds = BashOperator(
        task_id='print_prev_ds',
        bash_command='echo {{ prev_ds }} {{ macros.ds_add("2015-01-01", 5) }}')

    check_api = HttpSensor(
        task_id='check_api',
        endpoint='/world',
        response_check=lambda response: response.status_code == 200)

    download_covid19_data = PythonOperator(
        task_id='download_covid19_data',
        python_callable=_download_covid19_data)

    create_table = SqliteOperator(task_id='create_table',
                                  sqlite_conn_id='sqlite_default',
                                  sql='''
            CREATE TABLE IF NOT EXISTS covid19 (
                NewConfirmed TEXT NOT NULL
            );
        ''')

    load_data_to_db = BashOperator(task_id='load_data_to_db',
예제 #13
0
           "customers --hive-import --create-hive-table --hive-table airflow.customers "
    return f"{cmd1} && {cmd2}"


with DAG(
        dag_id="Customer_360_pipeline",
        start_date=days_ago(1),
        schedule_interval="@daily",
        catchup=False,
        tags=["customer_360", "aws"]

) as dag:
    aws_sensor = HttpSensor(
        task_id="watch_for_order_s3",
        endpoint="orders.csv",
        http_conn_id="orders_s3",
        retries=10,
        response_check=lambda response: response.status_code == 200,
        retry_delay=timedelta(seconds=10)
    )

    ssh_edge_download_task = SSHOperator(
        task_id="download_orders",
        ssh_conn_id="cloudera",
        command=download_order_command,

    )
    import_customers_info = SSHOperator(
        task_id="import_customers_from_sql",
        ssh_conn_id="cloudera",
        command=load_customer_info_cmd()
    )
예제 #14
0
         catchup=False) as dag:
    creating_table = SqliteOperator(task_id='creating_table',
                                    sqlite_conn_id='db_sqlite',
                                    sql='''
          CREATE TABLE IF NOT EXISTS users(
          firstname TEXT NOT NULL,
          lastname TEXT NOT NULL,
          country TEXT NOT NULL,
          username TEXT NOT NULL,
          password TEXT NOT NULL,
          email TEXT NOT NULL PRIMARY KEY
          );
          ''')

    is_api_avilabel = HttpSensor(task_id='is_api_avilabel',
                                 http_conn_id='user_api',
                                 endpoint='api/')

    extracting_user = SimpleHttpOperator(
        task_id='extracting_user',
        http_conn_id='user_api',
        endpoint='api/',
        method='GET',
        response_filter=lambda response: json.loads(response.text),
        log_response=True)

    processing_user = PythonOperator(
        task_id='processing_user',
        python_callable=_processing_user,
    )
예제 #15
0
create_table = SqliteOperator(
    dag=dag,
    task_id="create_table",
    sqlite_conn_id="db_sqlite",
    sql=f"""
        CREATE TABLE IF NOT EXISTS {_TABLE_NAME} (
            name TEXT NOT NULL,
            country TEXt NOT NULL
        );
    """,
)

check_api_available = HttpSensor(
    dag=dag,
    task_id="check_api_available",
    http_conn_id=_API_NAME,
    endpoint="api/",
)

extract_user = SimpleHttpOperator(
    dag=dag,
    task_id="extract_user",
    http_conn_id=_API_NAME,
    endpoint="api/",
    method="GET",
    response_filter=lambda response: json.loads(response.text),
    log_response=True,
)

process_user = PythonOperator(
    dag=dag,
예제 #16
0
    creating_table = SqliteOperator(task_id="creating_table",
                                    sqlite_conn_id="db_sqlite",
                                    sql="""
            CREATE TABLE IF NOT EXISTS users(
                firstname TEXT NOT NULL,
                lastname TEXT NOT NULL,
                country TEXT NOT NULL,
                username TEXT NOT NULL,
                password TEXT NOT NULL,
                email TEXT NOT NULL PRIMARY KEY
            );
        """)

    is_api_available = HttpSensor(task_id="is_api_available",
                                  http_conn_id="user_api",
                                  endpoint="api/")

    extracting_user = SimpleHttpOperator(
        task_id="extracting_user",
        http_conn_id="user_api",
        endpoint="api/",
        method="GET",
        response_filter=lambda response: json.loads(response.text),
        log_response=False)

    processing_user = PythonOperator(task_id="processing_user",
                                     python_callable=_processing_user)

    storing_user = BashOperator(
        task_id="storing_user",
예제 #17
0
    creating_table = SqliteOperator(task_id='creating_table',
                                    sqlite_conn_id='db_sqlite',
                                    sql='''
            CREATE TABLE IF NOT EXISTS users (
                firstname TEXT NOT NULL,
                lastname TEXT NOT NULL,
                country TEXT NOT NULL,
                username TEXT NOT NULL,
                password TEXT NOT NULL,
                email TEXT NOT NULL PRIMARY KEY
            );
        ''')

    is_api_available = HttpSensor(task_id='is_api_available',
                                  http_conn_id='user_api',
                                  endpoint='api/')

    extracting_user = SimpleHttpOperator(
        task_id='extracting_user',
        http_conn_id='user_api',
        endpoint='api/',
        method='GET',
        response_filter=lambda response: json.loads(response.text),
        log_response=True)

    processing_user = PythonOperator(task_id='processing_user',
                                     python_callable=_processing_user)

    storing_user = BashOperator(
        task_id='storing_user',
예제 #18
0
#     task_id='fail_task',
#     bash_command='exit 1',
#     on_failure_callback=slack_failed_task,
#     provide_context=True
# )

with DAG(dag_id='forex_data_pipeline',
        default_args=default_args, 
        schedule_interval='@daily',
        catchup=False) as dag:

    is_forex_rates_available = HttpSensor(
        task_id='is_forex_rates_available',
        method='GET',
        http_conn_id='forex_api',
        endpoint='latest',
        response_check=lambda response: 'rates' in response.text,
        poke_interval=5,
        timeout=20
    )

    is_forex_currencies_file_available = FileSensor(
        task_id='is_forex_currencies_file_available',
        fs_conn_id='forex_path',
        filepath='forex_currencies.csv',
        poke_interval=5,
        timeout=20
    )

    downloading_rates = PythonOperator(
        task_id='downloading_rates',