Exemple #1
0
def upload_model(ws: Workspace, config: MoveModelConfig) -> Model:
    """
    Uploads an InnerEye model to an AzureML workspace
    :param ws: The AzureML workspace
    :param config: move config
    :return: imported Model
    """
    model_path, environment_path = config.get_paths()
    with open(model_path / MODEL_JSON, 'r') as f:
        model_dict = json.load(f)

    # Find the folder containing the final model.
    final_model_path = model_path / FINAL_MODEL_FOLDER
    full_model_path = final_model_path if final_model_path.exists(
    ) else model_path / FINAL_ENSEMBLE_MODEL_FOLDER

    new_model = Model.register(ws,
                               model_path=str(full_model_path),
                               model_name=model_dict['name'],
                               tags=model_dict['tags'],
                               properties=model_dict['properties'],
                               description=model_dict['description'])
    env = Environment.load_from_directory(str(environment_path))
    env.register(workspace=ws)
    print(f"Environment {env.name} registered")
    return new_model
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    # Create a reusable run configuration environment
    # Read definition from diabetes_regression/azureml_environment.json
    # Make sure to include `r-essentials'
    #   in diabetes_regression/conda_dependencies.yml
    environment = Environment.load_from_directory(e.sources_directory_train)
    if (e.collection_uri is not None and e.teamproject_name is not None):
        builduri_base = e.collection_uri + e.teamproject_name
        builduri_base = builduri_base + "/_build/results?buildId="
        environment.environment_variables["BUILDURI_BASE"] = builduri_base
    environment.register(aml_workspace)

    run_config = RunConfiguration()
    run_config.environment = environment

    train_step = PythonScriptStep(
        name="Train Model",
        script_name="train_with_r.py",
        compute_target=aml_compute,
        source_directory="diabetes_regression/training/R",
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    steps = [train_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(name=e.workspace_name,
                                  subscription_id=e.subscription_id,
                                  resource_group=e.resource_group)
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(aml_workspace, e.compute_name, e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    # Create a reusable run configuration environment
    # Read definition from diabetes_regression/azureml_environment.json
    environment = Environment.load_from_directory(e.sources_directory_train)
    if (e.collection_uri is not None and e.teamproject_name is not None):
        builduri_base = e.collection_uri + e.teamproject_name
        builduri_base = builduri_base + "/_build/results?buildId="
        environment.environment_variables["BUILDURI_BASE"] = builduri_base
    environment.register(aml_workspace)

    run_config = RunConfiguration()
    run_config.environment = environment

    model_name_param = PipelineParameter(name="model_name",
                                         default_value=e.model_name)
    build_id_param = PipelineParameter(name="build_id",
                                       default_value=e.build_id)

    # Get dataset name
    dataset_name = e.dataset_name

    # Check to see if dataset exists
    if (dataset_name not in aml_workspace.datasets):
        # Create dataset from diabetes sample data
        sample_data = load_diabetes()
        df = pd.DataFrame(data=sample_data.data,
                          columns=sample_data.feature_names)
        df['Y'] = sample_data.target
        file_name = 'diabetes.csv'
        df.to_csv(file_name, index=False)

        # Upload file to default datastore in workspace
        default_ds = aml_workspace.get_default_datastore()
        target_path = 'training-data/'
        default_ds.upload_files(files=[file_name],
                                target_path=target_path,
                                overwrite=True,
                                show_progress=False)

        # Register dataset
        path_on_datastore = os.path.join(target_path, file_name)
        dataset = Dataset.Tabular.from_delimited_files(
            path=(default_ds, path_on_datastore))
        dataset = dataset.register(workspace=aml_workspace,
                                   name=dataset_name,
                                   description='diabetes training data',
                                   tags={'format': 'CSV'},
                                   create_new_version=True)

    # Get the dataset
    dataset = Dataset.get_by_name(aml_workspace, dataset_name)

    # Create a PipelineData to pass data between steps
    pipeline_data = PipelineData(
        'pipeline_data', datastore=aml_workspace.get_default_datastore())

    train_step = PythonScriptStep(
        name="Train Model",
        script_name=e.train_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        inputs=[dataset.as_named_input('training_data')],
        outputs=[pipeline_data],
        arguments=[
            "--build_id", build_id_param, "--model_name", model_name_param,
            "--step_output", pipeline_data
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=e.evaluate_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--build_id",
            build_id_param,
            "--model_name",
            model_name_param,
            "--allow_run_cancel",
            e.allow_run_cancel,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    register_step = PythonScriptStep(
        name="Register Model ",
        script_name=e.register_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        inputs=[pipeline_data],
        arguments=[
            "--build_id",
            build_id_param,
            "--model_name",
            model_name_param,
            "--step_input",
            pipeline_data,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Register created")
    # Check run_evaluation flag to include or exclude evaluation step.
    if ((e.run_evaluation).lower() == 'true'):
        print("Include evaluation step before register step.")
        evaluate_step.run_after(train_step)
        register_step.run_after(evaluate_step)
        steps = [train_step, evaluate_step, register_step]
    else:
        print("Exclude evaluation step and directly run register step.")
        register_step.run_after(train_step)
        steps = [train_step, register_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline._set_experiment_name
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id)
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')
Exemple #4
0
def main():
    env = EnvironmentVariables()
    args = add_arguments()

    workspace = get_workspace()

    cpu_cluster_name = env.cpu_cluster_name
    compute = get_or_create_compute(workspace, cpu_cluster_name,
                                    env.compute_vm_size, env.max_nodes)

    environment = Environment.load_from_directory(env.sources_directory_train)
    environment.register(workspace)
    run_configuration = RunConfiguration()
    run_configuration.environment = environment

    model_name_param = PipelineParameter(name="model_name",
                                         default_value=env.model_name)
    build_id_param = PipelineParameter(name="build_id",
                                       default_value=env.build_id)
    should_tune_hyperparameters_param = PipelineParameter(
        name="should_tune_hyperparameters",
        default_value=env.should_tune_hyperparameters)
    parallelism_level_param = PipelineParameter(
        name="parallelism_level", default_value=env.parallelism_level)
    force_register_param = PipelineParameter(name="force_register",
                                             default_value=env.force_register)

    datastore = get_datastore()

    dataset_name = env.dataset_name
    dataset_path = env.dataset_path
    print(
        f"Creating new dataset version for {dataset_name} in datastore {datastore} from file {dataset_path}"
    )
    temp_dataset = Dataset.Tabular.from_delimited_files(path=[(datastore,
                                                               dataset_path)])
    dataset = temp_dataset.register(workspace=workspace,
                                    name=dataset_name,
                                    description=dataset_name,
                                    tags={'format': 'CSV'},
                                    create_new_version=True)

    train_output = PipelineData('train_output',
                                output_name='train_output',
                                datastore=datastore)

    train_step = PythonScriptStep(
        name="Train model",
        compute_target=compute,
        script_name=env.train_script_name,
        runconfig=run_configuration,
        inputs=[dataset.as_named_input('training')],
        outputs=[train_output],
        arguments=[
            "--build_id", build_id_param, "--model_name", model_name_param,
            "--parallelism_level", parallelism_level_param,
            "--should_tune_hyperparameters", should_tune_hyperparameters_param
        ],
        allow_reuse=False)

    evaluate_step = PythonScriptStep(name="Evaluate model",
                                     compute_target=compute,
                                     script_name=env.evaluate_script_name,
                                     runconfig=run_configuration,
                                     inputs=[train_output],
                                     arguments=[
                                         "--build_id", build_id_param,
                                         "--model_name", model_name_param,
                                         "--train_output", train_output,
                                         "--force_register",
                                         force_register_param
                                     ],
                                     allow_reuse=False)

    register_step = PythonScriptStep(name="Register model",
                                     compute_target=compute,
                                     script_name=env.register_script_name,
                                     runconfig=run_configuration,
                                     inputs=[train_output],
                                     arguments=[
                                         "--build_id", build_id_param,
                                         "--model_name", model_name_param,
                                         "--train_output", train_output
                                     ],
                                     allow_reuse=False)

    evaluate_step.run_after(train_step)
    register_step.run_after(evaluate_step)

    steps = [train_step, evaluate_step, register_step]

    train_pipeline = Pipeline(workspace=workspace, steps=steps)
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=env.pipeline_name,
        description="Train/Eval/Register if better pipeline",
        version=env.build_id)

    output_file_name = args.output_file_name
    if output_file_name:
        with open(output_file_name, "w") as output_file:
            output_file.write(published_pipeline.id)

    print(
        f"Published pipeline {published_pipeline.name} for build {published_pipeline.version}"
    )
def main():
    e = Env()
    # Get Azure machine learning workspace
    aml_workspace = Workspace.get(
        name=e.workspace_name,
        subscription_id=e.subscription_id,
        resource_group=e.resource_group
    )
    print("get_workspace:")
    print(aml_workspace)

    # Get Azure machine learning cluster
    aml_compute = get_compute(
        aml_workspace,
        e.compute_name,
        e.vm_size)
    if aml_compute is not None:
        print("aml_compute:")
        print(aml_compute)

    # Create a reusable run configuration environment
    # Read definition from diabetes_regression/azureml_environment.json
    environment = Environment.load_from_directory(e.sources_directory_train)
    if (e.collection_uri is not None and e.teamproject_name is not None):
        builduri_base = e.collection_uri + e.teamproject_name
        builduri_base = builduri_base + "/_build/results?buildId="
        environment.environment_variables["BUILDURI_BASE"] = builduri_base
    environment.register(aml_workspace)

    run_config = RunConfiguration()
    run_config.environment = environment

    model_name_param = PipelineParameter(
        name="model_name", default_value=e.model_name)
    build_id_param = PipelineParameter(
        name="build_id", default_value=e.build_id)

    dataset_name = ""
    if (e.datastore_name is not None and e.datafile_name is not None):
        dataset_name = e.dataset_name
        datastore = Datastore.get(aml_workspace, e.datastore_name)
        data_path = [(datastore, e.datafile_name)]
        dataset = Dataset.Tabular.from_delimited_files(path=data_path)
        dataset.register(workspace=aml_workspace,
                         name=e.dataset_name,
                         description="dataset with training data",
                         create_new_version=True)

    train_step = PythonScriptStep(
        name="Train Model",
        script_name=e.train_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--build_id", build_id_param,
            "--model_name", model_name_param,
            "--dataset_name", dataset_name,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Train created")

    evaluate_step = PythonScriptStep(
        name="Evaluate Model ",
        script_name=e.evaluate_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--build_id", build_id_param,
            "--model_name", model_name_param,
            "--allow_run_cancel", e.allow_run_cancel,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Evaluate created")

    register_step = PythonScriptStep(
        name="Register Model ",
        script_name=e.register_script_path,
        compute_target=aml_compute,
        source_directory=e.sources_directory_train,
        arguments=[
            "--build_id", build_id_param,
            "--model_name", model_name_param,
        ],
        runconfig=run_config,
        allow_reuse=False,
    )
    print("Step Register created")
    # Check run_evaluation flag to include or exclude evaluation step.
    if ((e.run_evaluation).lower() == 'true'):
        print("Include evaluation step before register step.")
        evaluate_step.run_after(train_step)
        register_step.run_after(evaluate_step)
        steps = [train_step, evaluate_step, register_step]
    else:
        print("Exclude evaluation step and directly run register step.")
        register_step.run_after(train_step)
        steps = [train_step, register_step]

    train_pipeline = Pipeline(workspace=aml_workspace, steps=steps)
    train_pipeline._set_experiment_name
    train_pipeline.validate()
    published_pipeline = train_pipeline.publish(
        name=e.pipeline_name,
        description="Model training/retraining pipeline",
        version=e.build_id
    )
    print(f'Published pipeline: {published_pipeline.name}')
    print(f'for build {published_pipeline.version}')