Beispiel #1
0
def create_databricks_step(
        input_dataset: Dataset, compute: ComputeTarget,
        debug_run: bool) -> Tuple[DatabricksStep, PipelineData]:
    output_data = PipelineData(name="ParquetFiles",
                               datastore=WS.get_default_datastore(),
                               is_directory=True)

    node_size = 'Standard_DS4_v2'
    spark_version = '7.3.x-cpu-ml-scala2.12'

    db_step = DatabricksStep(
        name='Convert to Parquet',
        inputs=[input_dataset.as_named_input("CSVFiles")],
        outputs=[output_data],
        source_directory="./safe-driver/prep_data",
        python_script_name='prep_data.py',
        python_script_params=["--number-of-files",
                              "1"],  # Set the number of output files to 1
        num_workers=1,
        compute_target=compute,
        pypi_libraries=[],
        allow_reuse=debug_run,
        node_type=node_size,
        spark_version=spark_version,
    )

    return db_step, output_data
Beispiel #2
0
def create_evaluate_model_step(
        model_metadata_folder: PipelineData, compute: ComputeTarget,
        validation_data: Dataset,
        debug_run: bool) -> Tuple[PythonScriptStep, PipelineData]:
    """
    Creates "Evaluate Model" Step
    """
    output_folder = "./outputs"
    output_data = PipelineData(name="RegisterModel",
                               datastore=WS.get_default_datastore(),
                               is_directory=True,
                               output_path_on_compute=output_folder,
                               output_mode="upload")

    eval_step = PythonScriptStep(
        name="Evaluate Model",
        script_name="evaluate.py",
        source_directory='./safe-driver/evaluate/',
        compute_target=compute,
        inputs=[model_metadata_folder],
        outputs=[output_data],
        arguments=[
            "--model-metadata",
            model_metadata_folder.as_mount(), "--register-model-folder",
            output_folder, "--validation-data",
            validation_data.as_named_input("ValidationData").as_mount()
        ],
        allow_reuse=debug_run,
        runconfig=RC)

    return eval_step, output_data
Beispiel #3
0
ws = Workspace.from_config()

from azureml.pipeline.steps import PythonScriptStep

from azureml.pipeline.core import PipelineData

df = Dataset(ws, "my_dataset")
ds = ws.get_default_datastore()

prepped_data = PipelineData("prepped", ds)

step1 = PythonScriptStep(name = "prepare_data",
                         source_directory="Scripts",
                         script_name = "prepare.py",
                         compute_target="my_cluster1",
                         arguments=['--raw_data', df.as_named_input('raw_data'),
                                    '--out_folder', prepped_data]
                         outputs=[prepped_data])


step2 = PythonScriptStep(name = "training_data",
                         source_directory="Scripts",
                         script_name = "training.py",
                         compute_target="my_cluster2",
                         arguments = ['--in_folder', prepped_data]
                         input = [prepped_data]
                         )


from azureml.pipeline.core import Pipeline