예제 #1
0
    def test_injected_hook(self):
        def_hook = LivyHook(livy_conn_id='livyunittest')

        task = LivyOperator(file='sparkapp', dag=self.dag, task_id='livy_example')
        task._livy_hook = def_hook

        assert task.get_hook() == def_hook
예제 #2
0
    def test_execution_with_extra_options(self, mock_post):
        extra_options = {'check_response': True}
        task = LivyOperator(file='sparkapp',
                            dag=self.dag,
                            task_id='livy_example',
                            extra_options=extra_options)

        task.execute(context={})

        assert task.get_hook().extra_options == extra_options
예제 #3
0
    def test_execution(self, mock_post, mock_get):
        task = LivyOperator(livy_conn_id='livyunittest',
                            file='sparkapp',
                            polling_interval=1,
                            dag=self.dag,
                            task_id='livy_example')
        task.execute(context={})

        call_args = {k: v for k, v in mock_post.call_args[1].items() if v}
        self.assertEqual(call_args, {'file': 'sparkapp'})
        mock_get.assert_called_once_with(BATCH_ID)
예제 #4
0
    def test_poll_for_termination_fail(self, mock_livy):

        state_list = 2 * [BatchState.RUNNING] + [BatchState.ERROR]

        def side_effect(_):
            if state_list:
                return state_list.pop(0)
            # fail if does not stop right before
            raise AssertionError()

        mock_livy.side_effect = side_effect

        task = LivyOperator(
            file='sparkapp',
            polling_interval=1,
            dag=self.dag,
            task_id='livy_example'
        )
        task._livy_hook = task.get_hook()

        with self.assertRaises(AirflowException):
            task.poll_for_termination(BATCH_ID)

        mock_livy.assert_called_with(BATCH_ID)
        self.assertEqual(mock_livy.call_count, 3)
예제 #5
0
    def test_deletion(self, mock_post, mock_delete):
        task = LivyOperator(
            livy_conn_id='livyunittest', file='sparkapp', dag=self.dag, task_id='livy_example'
        )
        task.execute(context={})
        task.kill()

        mock_delete.assert_called_once_with(BATCH_ID)
예제 #6
0
def _build_livy_operator(
        task: str,
        spark_conf_extra: Optional[Dict[Any, Any]] = None) -> LivyOperator:

    spark_conf_base = {
        "spark.yarn.appMasterEnv.PYSPARK_PYTHON": "./env/bin/python",
    }
    spark_conf_extra = spark_conf_extra or {}

    return LivyOperator(
        task_id=task,
        file=f"{ETL_CODE_LOCATION}/main.py",
        args=["--task", task, "--execution-date", "{{ ds }}"],
        archives=[f"{ETL_CODE_LOCATION}/venv_build.tar.gz#env"],
        conf={
            **spark_conf_base,
            **spark_conf_extra
        },
        proxy_user=LIVY_PROXY_USER,
        livy_conn_id=LIVY_CONN_ID,
    )
예제 #7
0
    'depends_on_past': False
}

with DAG(
        dag_id='example_livy_operator',
        default_args=args,
        schedule_interval='@daily',
        start_date=days_ago(5),
) as dag:

    livy_java_task = LivyOperator(
        task_id='pi_java_task',
        dag=dag,
        livy_conn_id='livy_conn_default',
        file='/spark-examples.jar',
        args=[10],
        num_executors=1,
        conf={
            'spark.shuffle.compress': 'false',
        },
        class_name='org.apache.spark.examples.SparkPi',
    )

    livy_python_task = LivyOperator(
        task_id='pi_python_task',
        dag=dag,
        livy_conn_id='livy_conn_default',
        file='/pi.py',
        args=[10],
        polling_interval=60,
    )
예제 #8
0
from airflow import DAG
from datetime import timedelta
from airflow.utils.dates import days_ago
from airflow.operators.bash_operator import BashOperator
from airflow.operators.http_operator import SimpleHttpOperator
from airflow.providers.apache.livy.operators.livy import LivyOperator

default_args = {
    'owner': 'airflow',
    'depends_on_past': False,
    'start_date': days_ago(2),
    'email': ['*****@*****.**'],
    'email_on_failure': False,
    'email_on_retry': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5)
}

dag = DAG(
    'livydag',
    default_args=default_args,
    description='Livy DAG',
    schedule_interval=timedelta(days=1),
)

livy_batch = LivyOperator(dag=dag,
                          task_id="livy_batch",
                          file="/opt/jars/spark-examples_2.11-2.4.6.jar",
                          class_name="org.apache.spark.examples.SparkPi",
                          polling_interval=1)
예제 #9
0
        default_args=default_args,
        description='A simple test livy DAG',
        schedule_interval=None,
        start_date=days_ago(0),
        tags=['test'],
) as dag:
    dag.doc_md = dedent("""\
    Тестовый WF с вызовом простой трансформации
    """)
    aws_test = LivyOperator(
        task_id='aws_test2',
        dag=dag,
        livy_conn_id='livy_default',
        file='s3a://datagram/user/root/deployments/autogenerated_tr_aws_test_2/'
        'ru.neoflex.meta.etl2.spark.aws_test_2-1.0-SNAPSHOT.jar',
        proxy_user='******',
        args=[
            'HOME=/user', 'USER=root', 'WF_HOME=/user/root',
            'ROOT_WORKFLOW_ID=aws_test_2_729508210',
            'CURRENT_WORKFLOW_ID=aws_test_2_729508210', 'SLIDE_SIZE=400',
            'FETCH_SIZE=1000', 'PARTITION_NUM=1', 'FAIL_THRESHOLD=1000',
            'DEBUG=true', 'MASTER='
        ],
        num_executors=1,
        conf={
            'spark.shuffle.compress': 'false',
        },
        class_name='ru.neoflex.meta.etl2.spark.aws_test_2Job',
        polling_interval=5,
    )