Exemple #1
0
 def test_namespace_from_connection(self, mock_create_namespaced_crd,
                                    mock_kubernetes_hook):
     op = SparkKubernetesOperator(
         application_file=TEST_VALID_APPLICATION_JSON,
         dag=self.dag,
         kubernetes_conn_id='kubernetes_with_namespace',
         task_id='test_task_id')
     op.execute(None)
     mock_kubernetes_hook.assert_called_once_with()
     mock_create_namespaced_crd.assert_called_with(
         body=TEST_APPLICATION_DICT,
         group='sparkoperator.k8s.io',
         namespace='mock_namespace',
         plural='sparkapplications',
         version='v1beta2')
 def test_create_application_from_json_with_api_group_and_version(
         self, mock_create_namespaced_crd, mock_kubernetes_hook):
     api_group = 'sparkoperator.example.com'
     api_version = 'v1alpha1'
     op = SparkKubernetesOperator(
         application_file=TEST_VALID_APPLICATION_JSON,
         dag=self.dag,
         kubernetes_conn_id='kubernetes_default_kube_config',
         task_id='test_task_id',
         api_group=api_group,
         api_version=api_version,
     )
     op.execute(None)
     mock_kubernetes_hook.assert_called_once_with()
     mock_create_namespaced_crd.assert_called_with(
         body=TEST_APPLICATION_DICT,
         group=api_group,
         namespace='default',
         plural='sparkapplications',
         version=api_version,
     )
dag = DAG(
    'spark_pi',
    default_args=default_args,
    schedule_interval=timedelta(days=1),
    tags=['example']
)

# spark = open(
#     "example_spark_kubernetes_operator_pi.yaml").read()

submit = SparkKubernetesOperator(
    task_id='spark_pi_submit',
    namespace="sampletenant",
    application_file="example_spark_kubernetes_operator_pi.yaml",
    kubernetes_conn_id="kubernetes_in_cluster",
    do_xcom_push=True,
    dag=dag,
    api_group="sparkoperator.hpe.com"
)

sensor = SparkKubernetesSensor(
    task_id='spark_pi_monitor',
    namespace="sampletenant",
    application_name="{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}",
    kubernetes_conn_id="kubernetes_in_cluster",
    dag=dag,
    api_group="sparkoperator.hpe.com",
    attach_log=True
)
}
# [END default_args]

# [START instantiate_dag]

dag = DAG('spark_pi',
          default_args=default_args,
          description='submit spark-pi as sparkApplication on kubernetes',
          schedule_interval=None,
          start_date=days_ago(1),
          user_defined_macros={'json': json})

t1 = SparkKubernetesOperator(
    task_id='spark_pi_submit',
    namespace="spark-work",
    application_file="example_spark_kubernetes_spark_pi.yaml",
    kubernetes_conn_id="kubernetes_default",
    do_xcom_push=True,
    dag=dag,
)

t2 = SparkKubernetesSensor(
    task_id='spark_pi_monitor',
    namespace="spark-work",
    application_name=
    "{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}",
    kubernetes_conn_id="kubernetes_default",
    attach_log=True,
    dag=dag,
)
t1 >> t2
Exemple #5
0
}
# [END default_args]

# [START instantiate_dag]

dag = DAG('spark_pi',
          default_args=default_args,
          schedule_interval=timedelta(minutes=5),
          dagrun_timeout=timedelta(minutes=5),
          tags=['example'])

spark = open("example_spark_kubernetes_operator_spark_pi.yaml").read()

t1 = SparkKubernetesOperator(
    task_id='spark_pi_submit',
    namespace="mycspace",
    application_file=spark,
    kubernetes_conn_id="kubernetes_default",
    do_xcom_push=True,
    dag=dag,
)

t2 = SparkKubernetesSensor(
    task_id='spark_pi_monitor',
    namespace="mycspace",
    application_name=
    "{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}",
    kubernetes_conn_id="kubernetes_default",
    dag=dag)
t1 >> t2
Exemple #6
0
default_args = {
    'owner': 'Matheus Jericó',
    'start_date': datetime(2021, 4, 1),
    'depends_on_past': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=10)
}

with DAG('minio-fifa-spark-operator',
         default_args=default_args,
         schedule_interval='@daily',
         tags=['development', 's3', 'minio', 'spark-operator']) as dag:

    etl_fifa_spark_operator = SparkKubernetesOperator(
        task_id='etl_fifa_spark_operator',
        namespace='processing',
        application_file='etl-fifa.yaml',
        kubernetes_conn_id='minikube',
        do_xcom_push=True)

    monitor_spark_app_status = SparkKubernetesSensor(
        task_id='monitor_spark_app_status',
        namespace="processing",
        application_name=
        "{{ task_instance.xcom_pull(task_ids='etl_fifa_spark_operator')['metadata']['name'] }}",
        kubernetes_conn_id="minikube")

    delete_s3_file_raw_zone = S3DeleteObjectsOperator(
        task_id='delete_s3_file_raw_zone',
        bucket=RAW_ZONE,
        keys='data.csv',
        aws_conn_id='minio',
Exemple #7
0
dag = DAG(
    'spark_pi',
    default_args=default_args,
    schedule_interval=timedelta(days=1),
    tags=['example']
)

# spark = open(
#     "example_spark_kubernetes_operator_pi.yaml").read()

submit = SparkKubernetesOperator(
    task_id='spark_pi_submit',
    namespace="spark-operator",
    application_file="example_spark_kubernetes_operator_pi.yaml",
    kubernetes_conn_id="kubernetes_in_cluster",
    do_xcom_push=True,
    dag=dag,
    # api_group="sparkoperator.hpe.com",
    enable_impersonation_from_ldap_user=False
)

sensor = SparkKubernetesSensor(
    task_id='spark_pi_monitor',
    namespace="spark-operator",
    application_name="{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}",
    kubernetes_conn_id="kubernetes_in_cluster",
    dag=dag,
    # api_group="sparkoperator.hpe.com",
    attach_log=True
)
# [END default_args]

# [START instantiate_dag]

dag = DAG('spark_pi',
          default_args=default_args,
          schedule_interval=timedelta(days=1),
          tags=['example'])

# spark = open(
#     "example_spark_kubernetes_operator_pi.yaml").read()

submit = SparkKubernetesOperator(
    task_id='spark_pi_submit',
    namespace="mycspace",
    application_file="example_spark_kubernetes_operator_pi.yaml",
    kubernetes_conn_id="kubernetes_in_cluster",
    do_xcom_push=True,
    dag=dag,
)

sensor = SparkKubernetesSensor(
    task_id='spark_pi_monitor',
    namespace="mycspace",
    application_name=
    "{{ task_instance.xcom_pull(task_ids='spark_pi_submit')['metadata']['name'] }}",
    kubernetes_conn_id="kubernetes_in_cluster",
    dag=dag)

submit >> sensor
    name="download_txtfile",
    namespace="airflow",
    image="cirrusci/wget",
    cmds=[
        "/bin/sh", "-c",
        "mkdir -p /mnt1/data &&  mkdir -p /mnt1/results && wget https://norvig.com/big.txt -O /mnt1/data/big.txt"
    ],
    task_id="download_txtfile",
    resources=compute_resources,
    volumes=[volume],
    volume_mounts=[volume_mount],
    get_logs=True,
    dag=dag)

spark_task = SparkKubernetesOperator(
    task_id="spark-wordcount",
    namespace="airflow",
    application_file="spark-wordcount.yaml",
    kubernetes_conn_id="kubernetes_default",
    dag=dag,
)

spark_sensor = SparkKubernetesSensor(task_id="spark-wordcount-monitor",
                                     namespace="airflow",
                                     application_name=spark_application_name,
                                     attach_log=True,
                                     kubernetes_conn_id="kubernetes_default",
                                     dag=dag)

download_txtfile >> spark_task >> spark_sensor
default_args = {
            "owner": "Airflow",
            "start_date": airflow.utils.dates.days_ago(1),
            "depends_on_past": False,
            "email_on_failure": False,
            "email_on_retry": False,
            "email": "*****@*****.**",
            "retries": 1,
            "retry_delay": timedelta(minutes=1)
        }

with DAG(dag_id="ddt-spark-k8s-operator", schedule_interval="@hourly", default_args=default_args, catchup=False) as dag:
    t1 = SparkKubernetesOperator(
        task_id='stage_1_submit',
        namespace="ddt-compute",
        application_file="SparkApplication_stage_1.yaml",
        kubernetes_conn_id="kubernetes_default",
        do_xcom_push=True
    )
    t2 = SparkKubernetesSensor(
        task_id='stage_1_monitor',
        namespace="ddt-compute",
        application_name="{{ task_instance.xcom_pull(task_ids='stage_1_submit')['metadata']['name'] }}",
        kubernetes_conn_id="kubernetes_default",

    )
    t1 >> t2



#посмотреть логи спарк оператора