Esempio n. 1
0
    def test_logging_head_error_request(
        self,
        mock_session_send
    ):
        def resp_check(resp):
            return True

        response = requests.Response()
        response.status_code = 404
        response.reason = 'Not Found'
        mock_session_send.return_value = response

        task = HttpSensor(
            dag=self.dag,
            task_id='http_sensor_head_method',
            http_conn_id='http_default',
            endpoint='',
            request_params={},
            method='HEAD',
            response_check=resp_check,
            timeout=5,
            poke_interval=1
        )

        with mock.patch.object(task.hook.log, 'error') as mock_errors:
            with self.assertRaises(AirflowSensorTimeout):
                task.execute(None)

            self.assertTrue(mock_errors.called)
            mock_errors.assert_called_with('HTTP error: %s', 'Not Found')
    def test_logging_head_error_request(self, mock_session_send):
        def resp_check(resp):
            return True

        response = requests.Response()
        response.status_code = 404
        response.reason = 'Not Found'
        mock_session_send.return_value = response

        task = HttpSensor(dag=self.dag,
                          task_id='http_sensor_head_method',
                          http_conn_id='http_default',
                          endpoint='',
                          request_params={},
                          method='HEAD',
                          response_check=resp_check,
                          timeout=5,
                          poke_interval=1)

        with mock.patch.object(task.hook.log, 'error') as mock_errors:
            with self.assertRaises(AirflowSensorTimeout):
                task.execute(None)

            self.assertTrue(mock_errors.called)
            mock_errors.assert_called_with('HTTP error: %s', 'Not Found')
Esempio n. 3
0
    def test_head_method(self, mock_session_send):
        def resp_check(resp):
            return True

        task = HttpSensor(
            dag=self.dag,
            task_id='http_sensor_head_method',
            http_conn_id='http_default',
            endpoint='',
            request_params={},
            method='HEAD',
            response_check=resp_check,
            timeout=5,
            poke_interval=1)

        task.execute(None)

        args, kwargs = mock_session_send.call_args
        received_request = args[0]

        prep_request = requests.Request(
            'HEAD',
            'https://www.google.com',
            {}).prepare()

        self.assertEqual(prep_request.url, received_request.url)
        self.assertTrue(prep_request.method, received_request.method)
Esempio n. 4
0
 def test_sensor(self):
     sensor = HttpSensor(
         task_id='http_sensor_check',
         http_conn_id='http_default',
         endpoint='/search',
         request_params={"client": "ubuntu", "q": "airflow", 'date': '{{ds}}'},
         headers={},
         response_check=lambda response: (
             "airbnb/airflow/" + DEFAULT_DATE.strftime('%Y-%m-%d')
             in response.text),
         poke_interval=5,
         timeout=15,
         dag=self.dag)
     sensor.run(start_date=DEFAULT_DATE, end_date=DEFAULT_DATE, ignore_ti_state=True)
    def test_poke_exception(self, mock_session_send):
        """
        Exception occurs in poke function should not be ignored.
        """
        response = requests.Response()
        response.status_code = 200
        mock_session_send.return_value = response

        def resp_check(resp):
            raise AirflowException('AirflowException raised here!')

        task = HttpSensor(task_id='http_sensor_poke_exception',
                          http_conn_id='http_default',
                          endpoint='',
                          request_params={},
                          response_check=resp_check,
                          timeout=5,
                          poke_interval=1)
        with self.assertRaisesRegexp(AirflowException,
                                     'AirflowException raised here!'):
            task.execute(None)
Esempio n. 6
0
    def test_poke_exception(self, mock_session_send):
        """
        Exception occurs in poke function should not be ignored.
        """
        response = requests.Response()
        response.status_code = 200
        mock_session_send.return_value = response

        def resp_check(resp):
            raise AirflowException('AirflowException raised here!')

        task = HttpSensor(
            task_id='http_sensor_poke_exception',
            http_conn_id='http_default',
            endpoint='',
            request_params={},
            response_check=resp_check,
            timeout=5,
            poke_interval=1)
        with self.assertRaisesRegexp(AirflowException, 'AirflowException raised here!'):
            task.execute(None)
Esempio n. 7
0
              'r') as readfile:
        reader = csv.DictReader(readfile, delimiter=',')

        for row in reader:
            print(row)


with DAG(dag_id="willshire_dag",
         schedule_interval="@daily",
         default_args=default_args,
         catchup=False) as dag:
    ## WILLSHIRE RATES
    is_willshire_5000_available = HttpSensor(
        task_id="is_willshire_5000_available",
        method="GET",
        http_conn_id="willshire_api",
        endpoint='latest',
        response_check=lambda response: "DATE" in response.text,
        poke_interval=5,
        timeout=20)

    downloading_willshire_rates = PythonOperator(
        task_id="downloading_willshire_rates",
        python_callable=download_willshire_rates)

    is_willshire_file_available = FileSensor(
        task_id="is_willshire_file_available",
        fs_conn_id="willshire_path",
        filepath="willshire.csv",
        poke_interval=5,
        timeout=20)
Esempio n. 8
0
    "email_on_failure": False,
    "email_on_retry": False,
    "email": "*****@*****.**",
    "retries": 1,
    "retry_delay": timedelta(minutes=5)
}

slack_token = BaseHook.get_connection("slack_conn").password

with DAG(dag_id="pycon_dag", schedule_interval="*/5 * * * *",
         default_args=default_args, catchup=False) as dag:
    esta_todo_bien = HttpSensor(
        task_id="ping_webpage",
        method="GET",
        http_conn_id="pyconar",
        endpoint="events/pyconar2020/",
        response_check=lambda response: 200 == response.status_code,
        poke_interval=5,
        timeout=20
    )

    sending_slack_notification = SlackWebhookOperator(
        task_id='sending_slack',
        http_conn_id='slack_conn',
        webhook_token=slack_token,
        message="Esta todo bien! \n Ahora toma un gatito! "
                "https://www.youtube.com/watch?v=J---aiyznGQ",
        username='******',
        icon_url='https://raw.githubusercontent.com/apache/'
                 'airflow/master/airflow/www/static/pin_100.png',
        dag=dag
Esempio n. 9
0
                      'a') as outfile:
                json.dump(outdata, outfile)
                outfile.write('\n')


with DAG(
        dag_id="forex_data_pipeline",
        schedule_interval="@daily",
        default_args=default_args,
        catchup=False,
) as dag:

    is_forex_rates_available = HttpSensor(
        task_id="is_forex_rates_available",
        http_conn_id="forex_api",
        endpoint="marclamberti/f45f872dea4dfd3eaa015a4a1af4b39b",
        response_check=lambda response: "rates" in response.text,
        poke_interval=5,
        timeout=20,
    )

    is_forex_currencies_file_available = FileSensor(
        task_id="is_forex_currencies_file_available",
        fs_conn_id="forex_path",
        filepath="forex_currencies.csv",
        poke_interval=5,
        timeout=20,
    )

    downloading_rates = PythonOperator(task_id="downloading_rates",
                                       python_callable=download_rates)
Esempio n. 10
0
DAG_NAME = 'HTTP_OPERATOR_TEST'
args = {'owner': 'airflow', 'start_date': airflow.utils.dates.days_ago(10)}

dag = DAG(
    dag_id=DAG_NAME,
    catchup=False,
    default_args=args,
    schedule_interval='3 12 * * *',
)

start_task = DummyOperator(task_id='starting_task', dag=dag)

http_sensor_task = HttpSensor(task_id='http_sensor_task',
                              http_conn_id='https_default',
                              method='GET',
                              endpoint='dog.ceo/api/breed/hound/images',
                              headers={"Content-Type": "application/json"},
                              xcom_push=True,
                              dag=dag)

t1 = SimpleHttpOperator(task_id='get_labrador',
                        method='GET',
                        http_conn_id='https_default',
                        endpoint='dog.ceo/api/breed/hound/images',
                        headers={"Content-Type": "application/json"},
                        xcom_push=True,
                        dag=dag)

http_sensor_task >> start_task >> t1
Esempio n. 11
0
    hook = S3_hook.S3Hook('s3_connection')
    hook.load_file(filename, key, bucket_name)

# Initiate our dag definition
dag = DAG(
    dag_id="us_covid_daily_report_pipeline",
    start_date=datetime(2020, 12, 1),
    schedule_interval="@daily"
)

# Check if Covid api has data for a given date
is_covid_api_available = HttpSensor(
    task_id="is_covid_api_available",
    method="GET",
    http_conn_id="covid_api",
    endpoint="v1/us/{{ ds_nodash }}.json",
    response_check=lambda response: "date" in response.text,
    poke_interval=5,
    timeout=20
)

# Download Covid US states data for a given date
fetch_covid_us_data = BashOperator(
    task_id="fetch_covid_us_data",
    bash_command="curl -o /opt/airflow/data/{{ ds_nodash }}.json \
        --request GET \
	    --url https://api.covidtracking.com/v1/us/{{ ds_nodash }}.json",
    dag=dag
)

# Upload Covid data to S3 using a helper function
Esempio n. 12
0
        message=slack_msg,
        username='******')
    return slack_send_message.execute(context=context)


with DAG(
        dag_id='get_gh_archive_2',
        default_args=default_args,
        schedule_interval='0 * * * *',
        catchup=True,
        max_active_runs=1,
) as dag:
    sensor = HttpSensor(
        task_id='check_if_present',
        http_conn_id=GH_ARCHIVE_CONN_ID,
        endpoint='{{execution_date.strftime("%Y-%m-%d-%-H")}}.json.gz',
        method='HEAD',
        poke_interval=5,
        dag=dag,
    )

    get_gh_archive = DownloadGZIPOperator(
        task_id='download',
        http_conn_id=GH_ARCHIVE_CONN_ID,
        endpoint='{{execution_date.strftime("%Y-%m-%d-%-H")}}.json.gz',
        method='GET',
        storage_location=
        'day_utc={{ ds }}/{{ execution_date.strftime("%Y-%m-%d-%-H") }}.json.gz',
        headers={'Accept-Encoding': 'deflate'},
    )

    unzip = BashOperator(
def check_http(response):
    content = response.text
    if len(content) > 0:
        log.info('the server responsed http content - %s' % content)
        return True

    log.info('the server did not respond')
    return False


sensor = HttpSensor(
    task_id='http_sensor',
    # 'http_default' goes to https://google.com
    http_conn_id='http_default',
    endpoint='',
    request_params={},
    response_check=check_http,
    poke_interval=5,
    dag=dag,
)

handler = PythonOperator(
    task_id='python_operator',
    provide_context=True,
    python_callable=handler,
    dag=dag
)


sensor >> handler
# we use the "with" python keyword to ensure that the DAG object is well closed once we are done using it
with DAG(dag_id="forex_data_pipeline",
         schedule_interval="@daily",
         default_args=default_args,
    # to prevent from running past dag runs, catchup = False
         catchup=False) as dag:

    # check if the rates at the link are available using the httpsensor
    is_forex_rates_available = HttpSensor(
        task_id = "is_forex_rates_available",
        method="GET",
        # for conn_id you need to put the link of the connection that you are connecting to
        # we use the name forex_api because we will create a connection in airflow that is called forex_api
        # the connection we refer to here refers to the connections available in http: // localhost:8080 / admin / connection /
        http_conn_id = "forex_api",
        endpoint = "latest",
        # we have to give a lambda function that returns true when you get a response from the http sensor
        # this lambda function basically returns True if the field rates is available/returned in the response.text
        response_check=lambda response: "rates" in response.text,
        # the http sensor should send an http request every 5 seconds
        poke_interval = 5,
        # for at most 20 seconds before it times out
        timeout = 20
    )

    # check if the currencies pair file is available in the directory
    is_forex_currencies_file_available = FileSensor(
        task_id="is_forex_currencies_file_available",
        fs_conn_id="forex_path",
        filepath="forex_currencies.csv",
        poke_interval=5,
        timeout=20
Esempio n. 15
0
def throw_task(dag, init, code_path, name='', debug=False):
    if name:
        name = '-' + name
    with open(code_path, 'r') as f:
        code = f.read()

    spark_session = SparkLivykHook(
        http_conn_id=CONNECTION,
        task_id='start-session' + name,
        data=json.dumps({'kind': 'spark'}),
        headers={'Content-Type': 'application/json'},
        endpoint='sessions',
        dag=dag,
    )

    sensor = HttpSensor(
        task_id='wait_spark_ready' + name,
        http_conn_id=CONNECTION,
        endpoint="{{'/sessions/'+ti.xcom_pull(task_ids='start-session" + name +
        "')+'/state'}}",
        request_params={},
        response_check=lambda response: response.json()['state'] == 'idle',
        poke_interval=5,
        dag=dag,
    )

    code = SparkLivykHook(
        http_conn_id=CONNECTION,
        task_id='send-task' + name,
        data=json.dumps({'code': code}),
        headers={'Content-Type': 'application/json'},
        endpoint="{{'/sessions/'+ti.xcom_pull(task_ids='start-session" + name +
        "')+'/statements'}}",
        dag=dag,
    )

    end_task = HttpSensor(
        task_id='end-task' + name,
        http_conn_id=CONNECTION,
        endpoint="{{'/sessions/'+ti.xcom_pull(task_ids='start-session" + name +
        "')+'/statements/'+ti.xcom_pull(task_ids='send-task" + name + "')}}",
        request_params={},
        response_check=statment_status,
        poke_interval=5,
        dag=dag,
    )

    if not debug:
        close_task = SimpleHttpOperator(
            method='DELETE',
            task_id='close-task' + name,
            http_conn_id=CONNECTION,
            endpoint="{{'/sessions/'+ti.xcom_pull(task_ids='start-session" +
            name + "')}}",
            dag=dag,
        )

        init >> spark_session >> sensor >> code >> end_task >> close_task
        return close_task
    else:
        init >> spark_session >> sensor >> code >> end_task
        return end_task
Esempio n. 16
0
from airflow.sensors.http_sensor import HttpSensor

# Python libraries
from datetime import datetime

# Initiate our dag definition
dag = DAG(dag_id="nyc_collisions_pipeline",
          start_date=datetime(2020, 12, 19),
          schedule_interval="@daily")

# Check if collisions api has data for a given date
is_collisions_api_available = HttpSensor(
    task_id="is_collisions_api_available",
    method="GET",
    http_conn_id="nyc_collisions_api",
    endpoint="resource/h9gi-nx95.json?crash_date={{ ds }}",
    response_check=lambda response: "crash_date" in response.text,
    poke_interval=5,
    timeout=20,
    dag=dag)

# Download collisions data for a given date
fetch_collisions_data = BashOperator(
    task_id="fetch_collisions_data",
    bash_command="curl -o /usr/local/airflow/data/{{ ds }}.json \
        --request GET \
        --url https://data.cityofnewyork.us/resource/h9gi-nx95.json?crash_date={{ ds }}",
    dag=dag)

# Define the dependencies
is_collisions_api_available >> fetch_collisions_data
Esempio n. 17
0
    'provide_context': True
}

dag = DAG(
    dag_id='github_commits_loader',
    default_args=default_args,
    schedule_interval=timedelta(days=1),
    max_active_runs=1,
    catchup=False,
)

check_commits = HttpSensor(
    task_id='pull_commits',
    http_conn_id='',
    headers={'Accept': 'application/vnd.github.cloak-preview'},
    method='GET',
    endpoint=
    f'https://api.github.com/search/commits?q=committer:{GITHUB_USERNAME}&sort=committer-date',
    response_check=lambda response: response.json()['total_count'] > 0,
    dag=dag,
)

load_github_commits = HttpToGcsOperator(
    task_id='load_github_commits',
    http_conn_id='',
    headers={'Accept': 'application/vnd.github.cloak-preview'},
    method='GET',
    endpoint=
    f'https://api.github.com/search/commits?q=committer:{GITHUB_USERNAME}&sort=committer-date',
    bucket=GOOGLE_STORAGE_BUCKET,
    filename=OUTPUT_FILENAME,
    dag=dag,
    """
    Filter changes

    Return a list of items that matched by criteria ``entity`` and ``action``
    """

    for task in tasks:
        _entity = get_entity(task["id"])
        if _entity == entity and task.get("task") == action:
            yield task


http_kernel_check = HttpSensor(
    task_id='http_kernel_check',
    http_conn_id='kernel_conn',
    endpoint='/changes',
    request_params={},
    poke_interval=5,
    dag=dag,
)


read_changes_task = ShortCircuitOperator(
    task_id="read_changes_task",
    provide_context=True,
    python_callable=read_changes,
    dag=dag,
)


def transform_journal(data):
    metadata = data["metadata"]
Esempio n. 19
0
    @apply_defaults
    def __init__(self, name, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.name = name

    def execute(self, context):
        message = "Hello {}".format(self.name)
        print(message)
        return message


task1 = BashOperator(task_id='t1', bash_command='eacho hello', dag=dag)
task2 = BashOperator(task_id='t2', bash_command='eacho t2', dag=dag)
task3 = BashOperator(task_id='t3',
                     bash_command='eacho t3',
                     dag=dag,
                     trigger_rule='all_failed')
task4 = MyOperator(name='Akshay',
                   task_id='t4',
                   dag=dag,
                   trigger_rule='one_success')

# Create connection id in UI.
sensor = HttpSensor(task_id='sensor',
                    endpoint='/',
                    http_conn_id='my_httpcon',
                    dag=dag,
                    retires=20,
                    retry_delay=timedelta(seconds=10))

sensor >> task1 >> [task2, task3] >> task4
Esempio n. 20
0
                outdata['rates'][pair] = indata['rates'][pair]
            with open('/usr/local/airflow/dags/files/forex_rates.json',
                      'a') as outfile:
                json.dump(outdata, outfile)
                outfile.write('\n')


with DAG(dag_id="forex_data_pipeline",
         schedule_interval="@daily",
         default_args=default_args,
         catchup=False) as dag:
    is_forex_rates_available = HttpSensor(
        task_id="is_forex_rates_available",
        method='GET',
        http_conn_id='forex_api',
        endpoint='latest',
        response_check=lambda response: "rates" in response.text,
        poke_interval=5,
        timeout=20,
    )

    is_forex_file_available = FileSensor(
        task_id="is_forex_file_available",
        fs_conn_id='forex_path',
        filepath="forex_currencies.csv",
        poke_interval=5,
        timeout=20,
    )

    downloading_rates = PythonOperator(
        task_id='forex_downloading_rates',
Esempio n. 21
0
    ac = AirflowController()
    ac.createHttpConnection("securethebox", "https://securethebox.us")


def t2_error_task(**context):
    instance = context['task_instance']
    print("Failed...", instance)


http_sensor = HttpSensor(
    task_id='http_sensor_task',
    http_conn_id='securethebox',
    endpoint='',
    method='GET',
    request_params=None,
    headers=None,
    response_check=False,
    extra_options=None,
    poke_interval=1,  # (seconds); checking site every 5 seconds
    timeout=30,  # timeout in 1 minute
    on_failure_callback=t2_error_task,
    dag=dag)


def printMessage(**context):
    xcomdata = context['task_instance'].xcom_pull(task_ids='http_sensor_task')
    print("print", xcomdata)


print_message = PythonOperator(task_id='print_message',
                               python_callable=printMessage,
t3 = SimpleHttpOperator(
    task_id='put_op',
    method='PUT',
    endpoint='api/v1.0/nodes',
    data=json.dumps({"priority": 5}),
    headers={"Content-Type": "application/json"},
    dag=dag)

t4 = SimpleHttpOperator(
    task_id='del_op',
    method='DELETE',
    endpoint='api/v1.0/nodes',
    data="some=data",
    headers={"Content-Type": "application/x-www-form-urlencoded"},
    dag=dag)

sensor = HttpSensor(
    task_id='http_sensor_check',
    http_conn_id='http_default',
    endpoint='',
    request_params={},
    response_check=lambda response: True if "Google" in response.content else False,
    poke_interval=5,
    dag=dag)

t1.set_upstream(sensor)
t2.set_upstream(t1)
t3.set_upstream(t2)
t4.set_upstream(t3)
t5.set_upstream(t4)
Esempio n. 23
0
from airflow.operators.hive_operator import HiveOperator
from airflow.contrib.operators.spark_submit_operator import SparkSubmitOperator
from airflow.operators.email_operator import EmailOperator
from airflow.operators.slack_operator import SlackAPIPostOperator

import json
import csv
import requests

default_args = {
    "owner": 'test',
    "start_date": datetime(2021, 1, 1),
    "depends_on_past": False,
    "email_on_failure": False,
    "email": "*****@*****.**",
    "retries": 1,
    "retry_delay": timedelta(minutes=5)
}

with DAG(dag_id="test",
         schedule_interval="@hourly",
         default_args=default_args,
         catchup=True) as dag:
    collect_data = HttpSensor(
        task_id="collect_data",
        method="GET",
        http_conn_id="data_api",
        endpoint="latest",
        response_check=lambda response: "rates" in response.text,
        poke_interval=5,
        timeout=20)
    """
    Filter changes

    Return a list of items that matched by criteria ``entity`` and ``action``
    """

    for task in tasks:
        _entity = get_entity(task["id"])
        if _entity == entity and task.get("task") == action:
            yield task


http_kernel_check = HttpSensor(
    task_id="http_kernel_check",
    http_conn_id="kernel_conn",
    endpoint="/changes",
    request_params={},
    poke_interval=5,
    dag=dag,
)

read_changes_task = ShortCircuitOperator(
    task_id="read_changes_task",
    provide_context=True,
    python_callable=read_changes,
    dag=dag,
)


def JournalFactory(data):
    """Produz instância de `models.Journal` a partir dos dados retornados do
    endpoint `/journals/:journal_id` do Kernel.
                outdata['rates'][pair] = indata['rates'][pair]
            with open('/usr/local/airflow/dags/files/forex_rates.json',
                      'a') as outfile:
                json.dump(outdata, outfile)
                outfile.write('\n')


with DAG(dag_id="forex_data_pipeline",
         schedule_interval="@daily",
         default_args=default_args,
         catchup=False) as dag:

    is_forex_rates_available = HttpSensor(
        task_id="is_forex_rates_available",
        method="GET",
        http_conn_id="forex_api",
        endpoint="latest",
        response_check=lambda response: "rates" in response.text,
        poke_interval=5,
        timeout=20)

    is_forex_currencies_file_available = FileSensor(
        task_id="is_forex_currencies_file_available",
        fs_conn_id="forex_path",
        filepath="forex_currencies.csv",
        poke_interval=5,
        timeout=20)

    downloading_rates = PythonOperator(task_id="downloading_rates",
                                       python_callable=download_rates)

    saving_rates = BashOperator(task_id="saving_rates",
Esempio n. 26
0
t3 = SimpleHttpOperator(
    task_id='put_op',
    method='PUT',
    endpoint='put',
    data=json.dumps({"priority": 5}),
    headers={"Content-Type": "application/json"},
    dag=dag,
)

t4 = SimpleHttpOperator(
    task_id='del_op',
    method='DELETE',
    endpoint='delete',
    data="some=data",
    headers={"Content-Type": "application/x-www-form-urlencoded"},
    dag=dag,
)

sensor = HttpSensor(
    task_id='http_sensor_check',
    http_conn_id='http_default',
    endpoint='',
    request_params={},
    response_check=lambda response: "httpbin" in response.text,
    poke_interval=5,
    dag=dag,
)

sensor >> t1 >> t2 >> t3 >> t4 >> t5
Esempio n. 27
-11
    def test_poke_context(self, mock_session_send):
        """
        test provide_context
        """
        response = requests.Response()
        response.status_code = 200
        mock_session_send.return_value = response

        def resp_check(resp, **context):
            if context:
                if "execution_date" in context:
                    if context["execution_date"] == DEFAULT_DATE:
                        return True

            raise AirflowException('AirflowException raised here!')

        task = HttpSensor(task_id='http_sensor_poke_exception',
                          http_conn_id='http_default',
                          endpoint='',
                          request_params={},
                          response_check=resp_check,
                          provide_context=True,
                          timeout=5,
                          poke_interval=1,
                          dag=self.dag)

        task_instance = TaskInstance(task=task, execution_date=DEFAULT_DATE)
        task.execute(task_instance.get_template_context())