def my_pipeline(): run_name = "test-run" new_cluster = { "spark_version": "5.3.x-scala2.11", "node_type_id": "Standard_D3_v2", "num_workers": 2 } spark_submit_task = { "parameters": [ "--class", "org.apache.spark.examples.SparkPi", "dbfs:/docs/sparkpi.jar", "10" ] } expected_spec = { "run_name": run_name, "new_cluster": new_cluster, "spark_submit_task": spark_submit_task } res = SubmitRunOp(name="submitrun", run_name=run_name, new_cluster=new_cluster, spark_submit_task=spark_submit_task) self.assert_res(res, expected_spec)
def my_pipeline(): run_name = "test-run" current_path = Path(__file__).parent json_spec_file_name = current_path.joinpath("run_spec.json") expected_spec = { "run_name": run_name, "new_cluster": { "spark_version": "5.3.x-scala2.11", "node_type_id": "Standard_D3_v2", "num_workers": 2 }, "spark_submit_task": { "parameters": [ "--class", "org.apache.spark.examples.SparkPi", "dbfs:/docs/sparkpi.jar", "10" ] } } res = SubmitRunOp.from_file_name(name="submitrun", run_name=run_name, file_name=json_spec_file_name) self.assert_res(res, expected_spec)
def my_pipeline(): run_name = "test-run" new_cluster = { "spark_version": "5.3.x-scala2.11", "node_type_id": "Standard_D3_v2", "num_workers": 2 } libraries = [{ "jar": "dbfs:/my-jar.jar" }, { "maven": { "coordinates": "org.jsoup:jsoup:1.7.2" } }] spark_jar_task = { "main_class_name": "com.databricks.ComputeModels" } expected_spec = { "run_name": run_name, "new_cluster": new_cluster, "libraries": libraries, "spark_jar_task": spark_jar_task } res = SubmitRunOp(name="submitrun", run_name=run_name, new_cluster=new_cluster, libraries=libraries, spark_jar_task=spark_jar_task) self.assert_res(res, expected_spec)
def my_pipeline(): run_name = "test-run" new_cluster = { "spark_version":"5.3.x-scala2.11", "node_type_id": "Standard_D3_v2", "num_workers": 2 } spark_python_task = { "python_file": "dbfs:/docs/pi.py", "parameters": [ "10" ] } expected_spec = { "run_name": run_name, "new_cluster": new_cluster, "spark_python_task": spark_python_task } res = SubmitRunOp( name="submitrun", run_name=run_name, new_cluster=new_cluster, spark_python_task=spark_python_task ) self.assert_res(res, expected_spec)
def my_pipeline(): SubmitRunOp( name="submitrun", new_cluster={ "spark_version":"5.3.x-scala2.11", "node_type_id": "Standard_D3_v2", "num_workers": 2 }, libraries=[ {"jar": "dbfs:/my-jar.jar"}, {"maven": {"coordinates": "org.jsoup:jsoup:1.7.2"}} ], spark_jar_task={ "main_class_name": "com.databricks.ComputeModels" } )
def my_pipeline(): run_name = "test-run" job_name = "test-job" python_params = ["john doe", "35"] expected_spec = { "run_name": run_name, "job_name": job_name, "python_params": python_params } res = SubmitRunOp(name="submitrun", run_name=run_name, job_name=job_name, python_params=python_params) self.assert_res(res, expected_spec)
def my_pipeline(): run_name = "test-run" job_name = "test-job" jar_params = ["param1", "param2"] expected_spec = { "run_name": run_name, "job_name": job_name, "jar_params": jar_params } res = SubmitRunOp(name="submitrun", run_name=run_name, job_name=job_name, jar_params=jar_params) self.assert_res(res, expected_spec)
def my_pipeline(): run_name = "test-run" job_name = "test-job" spark_submit_params = [ "--class", "org.apache.spark.examples.SparkPi" ] expected_spec = { "run_name": run_name, "job_name": job_name, "spark_submit_params": spark_submit_params } res = SubmitRunOp(name="submitrun", run_name=run_name, job_name=job_name, spark_submit_params=spark_submit_params) self.assert_res(res, expected_spec)
def my_pipeline(): run_name = "test-run" job_name = "test-job" notebook_params = { "dry-run": "true", "oldest-time-to-consider": "1457570074236" } expected_spec = { "run_name": run_name, "job_name": job_name, "notebook_params": notebook_params } res = SubmitRunOp(name="submitrun", run_name=run_name, job_name=job_name, notebook_params=notebook_params) self.assert_res(res, expected_spec)
def my_pipeline(): run_name = "test-run" existing_cluster_id = "1201-my-cluster" notebook_task = { "notebook_path": "/Users/[email protected]/my-notebook" } timeout_seconds = 120 expected_spec = { "run_name": run_name, "existing_cluster_id": existing_cluster_id, "notebook_task": notebook_task, "timeout_seconds": timeout_seconds } res = SubmitRunOp(name="submitrun", run_name=run_name, existing_cluster_id=existing_cluster_id, notebook_task=notebook_task, timeout_seconds=timeout_seconds) self.assert_res(res, expected_spec)