def my_pipeline():
            run_name = "test-run"
            new_cluster = {
                "spark_version": "5.3.x-scala2.11",
                "node_type_id": "Standard_D3_v2",
                "num_workers": 2
            }
            spark_submit_task = {
                "parameters": [
                    "--class", "org.apache.spark.examples.SparkPi",
                    "dbfs:/docs/sparkpi.jar", "10"
                ]
            }

            expected_spec = {
                "run_name": run_name,
                "new_cluster": new_cluster,
                "spark_submit_task": spark_submit_task
            }

            res = SubmitRunOp(name="submitrun",
                              run_name=run_name,
                              new_cluster=new_cluster,
                              spark_submit_task=spark_submit_task)

            self.assert_res(res, expected_spec)
        def my_pipeline():
            run_name = "test-run"
            current_path = Path(__file__).parent
            json_spec_file_name = current_path.joinpath("run_spec.json")

            expected_spec = {
                "run_name": run_name,
                "new_cluster": {
                    "spark_version": "5.3.x-scala2.11",
                    "node_type_id": "Standard_D3_v2",
                    "num_workers": 2
                },
                "spark_submit_task": {
                    "parameters": [
                        "--class", "org.apache.spark.examples.SparkPi",
                        "dbfs:/docs/sparkpi.jar", "10"
                    ]
                }
            }

            res = SubmitRunOp.from_file_name(name="submitrun",
                                             run_name=run_name,
                                             file_name=json_spec_file_name)

            self.assert_res(res, expected_spec)
        def my_pipeline():
            run_name = "test-run"
            new_cluster = {
                "spark_version": "5.3.x-scala2.11",
                "node_type_id": "Standard_D3_v2",
                "num_workers": 2
            }
            libraries = [{
                "jar": "dbfs:/my-jar.jar"
            }, {
                "maven": {
                    "coordinates": "org.jsoup:jsoup:1.7.2"
                }
            }]
            spark_jar_task = {
                "main_class_name": "com.databricks.ComputeModels"
            }

            expected_spec = {
                "run_name": run_name,
                "new_cluster": new_cluster,
                "libraries": libraries,
                "spark_jar_task": spark_jar_task
            }

            res = SubmitRunOp(name="submitrun",
                              run_name=run_name,
                              new_cluster=new_cluster,
                              libraries=libraries,
                              spark_jar_task=spark_jar_task)

            self.assert_res(res, expected_spec)
Beispiel #4
0
        def my_pipeline():
            run_name = "test-run"
            new_cluster = {
                "spark_version":"5.3.x-scala2.11",
                "node_type_id": "Standard_D3_v2",
                "num_workers": 2
            }
            spark_python_task = {
                "python_file": "dbfs:/docs/pi.py",
                "parameters": [
                    "10"
                ]
            }

            expected_spec = {
                "run_name": run_name,
                "new_cluster": new_cluster,
                "spark_python_task": spark_python_task
            }

            res = SubmitRunOp(
                name="submitrun",
                run_name=run_name,
                new_cluster=new_cluster,
                spark_python_task=spark_python_task
            )

            self.assert_res(res, expected_spec)
Beispiel #5
0
 def my_pipeline():
     SubmitRunOp(
         name="submitrun",
         new_cluster={
             "spark_version":"5.3.x-scala2.11",
             "node_type_id": "Standard_D3_v2",
             "num_workers": 2
         },
         libraries=[
             {"jar": "dbfs:/my-jar.jar"},
             {"maven": {"coordinates": "org.jsoup:jsoup:1.7.2"}}
         ],
         spark_jar_task={
             "main_class_name": "com.databricks.ComputeModels"
         }
     )
        def my_pipeline():
            run_name = "test-run"
            job_name = "test-job"
            python_params = ["john doe", "35"]

            expected_spec = {
                "run_name": run_name,
                "job_name": job_name,
                "python_params": python_params
            }

            res = SubmitRunOp(name="submitrun",
                              run_name=run_name,
                              job_name=job_name,
                              python_params=python_params)

            self.assert_res(res, expected_spec)
        def my_pipeline():
            run_name = "test-run"
            job_name = "test-job"
            jar_params = ["param1", "param2"]

            expected_spec = {
                "run_name": run_name,
                "job_name": job_name,
                "jar_params": jar_params
            }

            res = SubmitRunOp(name="submitrun",
                              run_name=run_name,
                              job_name=job_name,
                              jar_params=jar_params)

            self.assert_res(res, expected_spec)
        def my_pipeline():
            run_name = "test-run"
            job_name = "test-job"
            spark_submit_params = [
                "--class", "org.apache.spark.examples.SparkPi"
            ]

            expected_spec = {
                "run_name": run_name,
                "job_name": job_name,
                "spark_submit_params": spark_submit_params
            }

            res = SubmitRunOp(name="submitrun",
                              run_name=run_name,
                              job_name=job_name,
                              spark_submit_params=spark_submit_params)

            self.assert_res(res, expected_spec)
        def my_pipeline():
            run_name = "test-run"
            job_name = "test-job"
            notebook_params = {
                "dry-run": "true",
                "oldest-time-to-consider": "1457570074236"
            }

            expected_spec = {
                "run_name": run_name,
                "job_name": job_name,
                "notebook_params": notebook_params
            }

            res = SubmitRunOp(name="submitrun",
                              run_name=run_name,
                              job_name=job_name,
                              notebook_params=notebook_params)

            self.assert_res(res, expected_spec)
        def my_pipeline():
            run_name = "test-run"
            existing_cluster_id = "1201-my-cluster"
            notebook_task = {
                "notebook_path": "/Users/[email protected]/my-notebook"
            }
            timeout_seconds = 120

            expected_spec = {
                "run_name": run_name,
                "existing_cluster_id": existing_cluster_id,
                "notebook_task": notebook_task,
                "timeout_seconds": timeout_seconds
            }

            res = SubmitRunOp(name="submitrun",
                              run_name=run_name,
                              existing_cluster_id=existing_cluster_id,
                              notebook_task=notebook_task,
                              timeout_seconds=timeout_seconds)

            self.assert_res(res, expected_spec)