Ejemplo n.º 1
0
    def test_poke_exception(self):
        """
        Exception occurs in poke function should not be ignored.
        """
        def resp_check(resp):
            raise AirflowException('AirflowException raised here!')

        task = HttpSensor(task_id='http_sensor_poke_exception',
                          http_conn_id='http_default',
                          endpoint='',
                          params={},
                          response_check=resp_check,
                          poke_interval=5)
        with self.assertRaisesRegexp(AirflowException,
                                     'AirflowException raised here!'):
            task.execute(None)
Ejemplo n.º 2
0
    def test_poke_exception(self):
        """
        Exception occurs in poke function should not be ignored.
        """
        def resp_check(resp):
            raise AirflowException('AirflowException raised here!')

        task = HttpSensor(
            task_id='http_sensor_poke_exception',
            http_conn_id='http_default',
            endpoint='',
            params={},
            response_check=resp_check,
            poke_interval=5)
        with self.assertRaisesRegexp(AirflowException, 'AirflowException raised here!'):
            task.execute(None)
Ejemplo n.º 3
0
t3 = SimpleHttpOperator(task_id='put_op',
                        method='PUT',
                        endpoint='api/v1.0/nodes',
                        data=json.dumps({"priority": 5}),
                        headers={"Content-Type": "application/json"},
                        dag=dag)

t4 = SimpleHttpOperator(
    task_id='del_op',
    method='DELETE',
    endpoint='api/v1.0/nodes',
    data="some=data",
    headers={"Content-Type": "application/x-www-form-urlencoded"},
    dag=dag)

sensor = HttpSensor(task_id='http_sensor_check',
                    http_conn_id='http_default',
                    endpoint='',
                    params={},
                    response_check=lambda response: True
                    if "Google" in response.content else False,
                    poke_interval=5,
                    dag=dag)

t1.set_upstream(sensor)
t2.set_upstream(t1)
t3.set_upstream(t2)
t4.set_upstream(t3)
t5.set_upstream(t4)
Ejemplo n.º 4
0
                        "key": "country",
                        "value": "Germany"
                    }]
                }
            }
        }),
        headers=headers,
        xcom_push=True,
        dag=dag,
    )

    wait_for_dataprep_job_to_complete = HttpSensor(
        task_id='wait_for_dataprep_job_to_complete',
        endpoint=
        '/v4/jobGroups/{{ json.loads(ti.xcom_pull(task_ids="run_dataprep_job"))["id"] }}?embed=jobs.errorMessage',
        headers=headers,
        response_check=check_dataprep_run_complete,
        poke_interval=10,
        dag=dag,
    )

bigquery_run_sql = BigQueryOperator(
    task_id='bq_run_sql',
    use_legacy_sql=False,
    write_disposition='WRITE_TRUNCATE',
    allow_large_results=True,
    bql='''
    #standardsql
    SELECT
      stories.score AS stories_score,
      COUNT(stories.id) AS stories_count
Ejemplo n.º 5
0
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

dag = DAG('druid-ingest-covid', default_args=default_args)

dag.doc_md = __doc__

check_data = HttpSensor(
    task_id='covid-data-check',
    conn_id='http_default',
    endpoint='{{ macros.ds_format(ds, "%Y-%m-%d", "%d-%m-%Y") }}.csv',
    params={},
    response_check=lambda response: True
    if response.status_code == 200 else False,
    poke_interval=5,
    dag=dag)


def post_task(ds):
    endpoint = macros.ds_format(ds, "%Y-%m-%d", "%d-%m-%Y") + '.csv'
    http_conn_host = 'https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/'
    url = http_conn_host + endpoint

    with open('spec.json') as f:
        druid_spec = json.load(f)

    druid_spec['spec']['ioConfig']['inputSource']['uris'] = [url]
Ejemplo n.º 6
0
dag = DAG('mentions_data',
          default_args=default_args,
          schedule_interval='*/15 * * * *')

# t1, t2 and t3 are examples of tasks created by instantiating operators

t1 = PythonOperator(task_id='print_date',
                    provide_context=True,
                    python_callable=get_date,
                    dag=dag)

sensor = HttpSensor(
    task_id='check_for_new_dump',
    http_conn_id='http_default',
    method='HEAD',
    poke_interval=5,
    timeout=15 * 60,
    endpoint="{{ ti.xcom_pull(task_ids='print_date' )}}.mentions.CSV.zip",
    dag=dag)

t2 = BashOperator(
    task_id='producer',
    bash_command=
    "python /usr/local/kafka/airflow_producer_mentions.py {{ ti.xcom_pull(task_ids='print_date' )}}",
    retries=3,
    dag=dag)

t3 = BashOperator(
    task_id='consumer',
    bash_command='python /usr/local/kafka/airflow_consumer_mentions.py',
    retries=3,
Ejemplo n.º 7
0
def sensor_factory(sid, config, dagdict):
    default_paras = {
        'TimeSensor': {
            'poke_interval': 60,
            'timeout': 60 * 60 * 24,
            'hour': 0,
            'minute': 0
        },
        'SqlSensor': {
            'poke_interval': 60,
            'timeout': 60 * 60 * 24,
            'sql': '',
            'conn_id': ''
        },
        'HivePartitionSensor': {
            'poke_interval': 60 * 5,
            'timeout': 60 * 60 * 24,
            'table': '',
            'partition': '',
            'metastore_conn_id': '',
            'schema': 'default'
        },
        'HdfsSensor': {
            'poke_interval': 60,
            'timeout': 60 * 60 * 24,
            'filepath': '',
            'hdfs_conn_id': 'hdfs_default'
        },
        'HttpSensor': {
            'poke_interval': 60,
            'timeout': 60 * 60 * 24,
            'endpoint': '',
            'http_conn_id': 'http_default',
            'params': None,
            'headers': None,
            'response_check': None
        }
    }
    c = default_paras[config['type']]
    c.update(config)
    sensor_type = c['type']
    dag = dagdict[c['dag_id']]
    if sensor_type == 'TimeSensor':
        target_time = time(c['hour'], c['minute'])
        return TimeSensor(target_time=target_time,
                          task_id=sid,
                          dag=dag,
                          poke_interval=c['poke_interval'],
                          timeout=c['timeout'])

    elif sensor_type == 'SqlSensor':
        return SqlSensor(sql=c['sql'],
                         conn_id=c['conn_id'],
                         task_id=sid,
                         dag=dag,
                         poke_interval=c['poke_interval'],
                         timeout=c['timeout'])

    elif sensor_type == 'HivePartitionSensor':
        return HivePartitionSensor(table=c['table'],
                                   partition=c['partition'],
                                   schema=c['schema'],
                                   metastore_conn_id=c['metastore_conn_id'],
                                   task_id=sid,
                                   dag=dag,
                                   poke_interval=c['poke_interval'],
                                   timeout=c['timeout'])

    elif sensor_type == 'HdfsSensor':
        return HdfsSensor(task_id=sid,
                          dag=dag,
                          filepath=c['filepath'],
                          hdfs_conn_id=c['hdfs_conn_id'],
                          poke_interval=c['poke_interval'],
                          timeout=c['timeout'])

    elif sensor_type == 'HttpSensor':
        return HttpSensor(task_id=sid,
                          dag=dag,
                          endpoint=c['endpoint'],
                          http_conn_id=c['http_conn_id'],
                          poke_interval=c['poke_interval'],
                          timeout=c['timeout'])
    params={},
    retries=1,
    dag=dag)

t1 = SSHExecuteOperator(
    task_id='verify_transfer_to_remote',
    ssh_hook = sssh_hook_01,
    bash_command= AIRFLOW_HOME + '/dags/echo_date.sh',
    params={},
    retries=1,
    dag=dag)

domain01_sensor = HttpSensor(
    task_id='domain01_sensor',
    endpoint='',
    http_conn_id='http_domain01',
    retries=1,
    params={},
    dag=dag)

domain02_sensor = HttpSensor(
    task_id='domain02_sensor',
    endpoint='',
    http_conn_id='http_domain02',
    retries=1,
    params={},
    dag=dag)

domain03_sensor = HttpSensor(
    task_id='domain03_sensor',
    endpoint='',
Ejemplo n.º 9
0
t3 = SimpleHttpOperator(task_id='put_op',
                        method='PUT',
                        endpoint='api/v1.0/nodes',
                        data=json.dumps({"priority": 5}),
                        headers={"Content-Type": "application/json"},
                        dag=dag)

t4 = SimpleHttpOperator(
    task_id='del_op',
    method='DELETE',
    endpoint='api/v1.0/nodes',
    data="some=data",
    headers={"Content-Type": "application/x-www-form-urlencoded"},
    dag=dag)

sensor = HttpSensor(task_id='http_sensor_check',
                    conn_id='http_default',
                    endpoint='api/v1.0/apps',
                    params={},
                    headers={"Content-Type": "application/json"},
                    response_check=lambda response: True
                    if "collation" in response.content else False,
                    poke_interval=5,
                    dag=dag)

t1.set_upstream(sensor)
t2.set_upstream(t1)
t3.set_upstream(t2)
t4.set_upstream(t3)
t5.set_upstream(t4)
Ejemplo n.º 10
0
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': datetime(2018, 7, 1),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=1),
    # 'queue': 'bash_queue',
    # 'pool': 'backfill',
    # 'priority_weight': 10,
    # 'end_date': datetime(2016, 1, 1),
}

dag = DAG('lin1000_domain_checker',
          default_args=default_args,
          schedule_interval="* * * * *")

domain01_sensor = HttpSensor(task_id='lin1000_domain_sensor',
                             endpoint='',
                             http_conn_id='lin1000_domain_http',
                             retries=1,
                             params={},
                             dag=dag)

dummy_operator = DummyOperator(
    task_id='dummy_task',
    dag=dag,
)

dummy_operator.set_upstream(domain01_sensor)