def test_wait(self, mock_hook): job = self.create_job(JobStatus.RUNNING) job_id = "job_id" mock_hook.return_value.get_job.return_value = job sensor = DataprocJobSensor( task_id=TASK_ID, location=GCP_LOCATION, project_id=GCP_PROJECT, dataproc_job_id=job_id, gcp_conn_id=GCP_CONN_ID, timeout=TIMEOUT, ) ret = sensor.poke(context={}) mock_hook.return_value.get_job.assert_called_once_with( job_id=job_id, location=GCP_LOCATION, project_id=GCP_PROJECT) assert not ret
def test_cancelled(self, mock_hook): job = self.create_job(JobStatus.CANCELLED) job_id = "job_id" mock_hook.return_value.get_job.return_value = job sensor = DataprocJobSensor( task_id=TASK_ID, location=GCP_LOCATION, project_id=GCP_PROJECT, dataproc_job_id=job_id, gcp_conn_id=GCP_CONN_ID, timeout=TIMEOUT, ) with pytest.raises(AirflowException, match="Job was cancelled"): sensor.poke(context={}) mock_hook.return_value.get_job.assert_called_once_with( job_id=job_id, location=GCP_LOCATION, project_id=GCP_PROJECT)
def test_error(self, mock_hook): job = self.create_job(JobStatus.ERROR) job_id = "job_id" mock_hook.return_value.get_job.return_value = job sensor = DataprocJobSensor( task_id=TASK_ID, location=GCP_LOCATION, project_id=GCP_PROJECT, dataproc_job_id=job_id, gcp_conn_id=GCP_CONN_ID, timeout=TIMEOUT, ) with self.assertRaisesRegex(AirflowException, "Job failed"): sensor.poke(context={}) mock_hook.return_value.get_job.assert_called_once_with( job_id=job_id, location=GCP_LOCATION, project_id=GCP_PROJECT)
spark_task = DataprocSubmitJobOperator(task_id="spark_task", job=SPARK_JOB, region=REGION, project_id=PROJECT_ID) # [START cloud_dataproc_async_submit_sensor] spark_task_async = DataprocSubmitJobOperator(task_id="spark_task_async", job=SPARK_JOB, region=REGION, project_id=PROJECT_ID, asynchronous=True) spark_task_async_sensor = DataprocJobSensor( task_id='spark_task_async_sensor_task', region=REGION, project_id=PROJECT_ID, dataproc_job_id=spark_task_async.output, poke_interval=10, ) # [END cloud_dataproc_async_submit_sensor] # [START how_to_cloud_dataproc_submit_job_to_cluster_operator] pyspark_task = DataprocSubmitJobOperator(task_id="pyspark_task", job=PYSPARK_JOB, region=REGION, project_id=PROJECT_ID) # [END how_to_cloud_dataproc_submit_job_to_cluster_operator] sparkr_task = DataprocSubmitJobOperator(task_id="sparkr_task", job=SPARKR_JOB, region=REGION,
spark_task = DataprocSubmitJobOperator(task_id="spark_task", job=SPARK_JOB, location=REGION, project_id=PROJECT_ID) # [START cloud_dataproc_async_submit_sensor] spark_task_async = DataprocSubmitJobOperator(task_id="spark_task_async", job=SPARK_JOB, location=REGION, project_id=PROJECT_ID, asynchronous=True) spark_task_async_sensor = DataprocJobSensor( task_id='spark_task_async_sensor_task', location=REGION, project_id=PROJECT_ID, dataproc_job_id= "{{task_instance.xcom_pull(task_ids='spark_task_async')}}", poke_interval=10, ) # [END cloud_dataproc_async_submit_sensor] # [START how_to_cloud_dataproc_submit_job_to_cluster_operator] pyspark_task = DataprocSubmitJobOperator(task_id="pyspark_task", job=PYSPARK_JOB, location=REGION, project_id=PROJECT_ID) # [END how_to_cloud_dataproc_submit_job_to_cluster_operator] sparkr_task = DataprocSubmitJobOperator(task_id="sparkr_task", job=SPARKR_JOB, location=REGION,