def my_pipeline(): spec = { "name": "test-job", "new_cluster": { "spark_version": "5.3.x-scala2.11", "node_type_id": "Standard_D3_v2", "num_workers": 2 }, "libraries": [{ "jar": "dbfs:/my-jar.jar" }, { "maven": { "coordinates": "org.jsoup:jsoup:1.7.2" } }], "timeout_seconds": 3600, "max_retries": 1, "schedule": { "quartz_cron_expression": "0 15 22 ? * *", "timezone_id": "America/Los_Angeles" }, "spark_jar_task": { "main_class_name": "com.databricks.ComputeModels" } } res = CreateJobOp(name="createjob", spec=spec) self.assert_res(res, spec)
def my_pipeline(): job_name = "test-job" existing_cluster_id = "1201-my-cluster" schedule = { "quartz_cron_expression": "0 15 22 ? * *", "timezone_id": "America/Los_Angeles" } notebook_task = { "notebook_path": "/Users/[email protected]/my-notebook" } timeout_seconds = 120 expected_spec = { "name": job_name, "existing_cluster_id": existing_cluster_id, "schedule": schedule, "notebook_task": notebook_task, "timeout_seconds": timeout_seconds } res = CreateJobOp(name="createjob", job_name=job_name, existing_cluster_id=existing_cluster_id, schedule=schedule, notebook_task=notebook_task, timeout_seconds=timeout_seconds) self.assert_res(res, expected_spec)
def my_pipeline(): job_name = "test-job" new_cluster = { "spark_version": "5.3.x-scala2.11", "node_type_id": "Standard_D3_v2", "num_workers": 2 } schedule = { "quartz_cron_expression": "0 15 22 ? * *", "timezone_id": "America/Los_Angeles" } spark_submit_task = { "parameters": [ "--class", "org.apache.spark.examples.SparkPi", "dbfs:/docs/sparkpi.jar", "10" ] } expected_spec = { "name": job_name, "new_cluster": new_cluster, "schedule": schedule, "spark_submit_task": spark_submit_task } res = CreateJobOp(name="createjob", job_name=job_name, new_cluster=new_cluster, schedule=schedule, spark_submit_task=spark_submit_task) self.assert_res(res, expected_spec)
def my_pipeline(): job_name = "test-job" new_cluster = { "spark_version": "5.3.x-scala2.11", "node_type_id": "Standard_D3_v2", "num_workers": 2 } libraries = [{ "jar": "dbfs:/my-jar.jar" }, { "maven": { "coordinates": "org.jsoup:jsoup:1.7.2" } }] timeout_seconds = 3600 max_retries = 1 schedule = { "quartz_cron_expression": "0 15 22 ? * *", "timezone_id": "America/Los_Angeles" } spark_jar_task = { "main_class_name": "com.databricks.ComputeModels" } expected_spec = { "name": job_name, "new_cluster": new_cluster, "libraries": libraries, "timeout_seconds": timeout_seconds, "max_retries": max_retries, "schedule": schedule, "spark_jar_task": spark_jar_task } res = CreateJobOp(name="createjob", job_name=job_name, new_cluster=new_cluster, libraries=libraries, timeout_seconds=timeout_seconds, max_retries=max_retries, schedule=schedule, spark_jar_task=spark_jar_task) self.assert_res(res, expected_spec)
def my_pipeline(): job_name = "test-job" new_cluster = { "spark_version": "5.3.x-scala2.11", "node_type_id": "Standard_D3_v2", "num_workers": 2 } timeout_seconds = 3600 max_retries = 3 min_retry_interval_millis = 3600 retry_on_timeout = True schedule = { "quartz_cron_expression": "0 15 22 ? * *", "timezone_id": "America/Los_Angeles" } spark_python_task = { "python_file": "dbfs:/docs/pi.py", "parameters": ["10"] } expected_spec = { "name": job_name, "new_cluster": new_cluster, "timeout_seconds": timeout_seconds, "max_retries": max_retries, "min_retry_interval_millis": min_retry_interval_millis, "retry_on_timeout": retry_on_timeout, "schedule": schedule, "spark_python_task": spark_python_task } res = CreateJobOp( name="createjob", job_name=job_name, new_cluster=new_cluster, timeout_seconds=timeout_seconds, max_retries=max_retries, min_retry_interval_millis=min_retry_interval_millis, retry_on_timeout=retry_on_timeout, schedule=schedule, spark_python_task=spark_python_task) self.assert_res(res, expected_spec)
def my_pipeline(): job_name = "test-job" current_path = Path(__file__).parent json_spec_file_name = current_path.joinpath("job_spec.json") expected_spec = { "name": job_name, "new_cluster": { "spark_version": "5.3.x-scala2.11", "node_type_id": "Standard_D3_v2", "num_workers": 2 }, "libraries": [{ "jar": "dbfs:/my-jar.jar" }, { "maven": { "coordinates": "org.jsoup:jsoup:1.7.2" } }], "timeout_seconds": 3600, "max_retries": 1, "schedule": { "quartz_cron_expression": "0 15 22 ? * *", "timezone_id": "America/Los_Angeles" }, "spark_jar_task": { "main_class_name": "com.databricks.ComputeModels" } } res = CreateJobOp.from_file_name(name="createjob", job_name=job_name, file_name=json_spec_file_name) self.assert_res(res, expected_spec)
def my_pipeline(): CreateJobOp(name="createjob", new_cluster={ "spark_version": "5.3.x-scala2.11", "node_type_id": "Standard_D3_v2", "num_workers": 2 }, libraries=[{ "jar": "dbfs:/my-jar.jar" }, { "maven": { "coordinates": "org.jsoup:jsoup:1.7.2" } }], timeout_seconds=3600, max_retries=1, schedule={ "quartz_cron_expression": "0 15 22 ? * *", "timezone_id": "America/Los_Angeles" }, spark_jar_task={ "main_class_name": "com.databricks.ComputeModels" })