Ejemplo n.º 1
0
def run_notebook(id, run_name=None, parameters: dict = None):  # noqa: E501
    """run_notebook

    :param id: 
    :type id: str
    :param run_name: name to identify the run on the Kubeflow Pipelines UI, defaults to notebook name
    :type run_name: str
    :param parameters: optional run parameters, may be required based on pipeline definition
    :type parameters: dict

    :rtype: ApiRunCodeResponse
    """
    if not parameters and connexion.request.is_json:
        parameter_dict = dict(connexion.request.get_json())  # noqa: E501
    else:
        parameter_dict = parameters

    api_notebook, status_code = get_notebook(id)

    if status_code > 200:
        return f"Notebook with id '{id}' does not exist", 404

    # # TODO: Elyra kfp-notebook currently does not pass parameters on to papermill
    # if parameters:
    #     raise ApiError("The 'elyra-ai/kfp-notebook' executor does not support parameters", 422)

    # parameter_errors, status_code = validate_parameters(api_notebook.parameters, parameter_dict)
    #
    # if parameter_errors:
    #     return parameter_errors, status_code

    try:
        run_id = run_notebook_in_experiment(notebook=api_notebook,
                                            parameters=parameter_dict,
                                            run_name=run_name)

        # expected output notebook based on:
        #   https://github.com/elyra-ai/kfp-notebook/blob/c8f1298/etc/docker-scripts/bootstrapper.py#L188-L190
        notebook_url = get_object_url(bucket_name="mlpipeline",
                                      prefix=f"notebooks/{api_notebook.id}/",
                                      file_extensions=[".ipynb"])
        # TODO: create a "sandboxed" notebook in a subfolder since Elyra overwrites
        #   the original notebook instead of creating an "-output.ipynb" file:
        #   https://github.com/elyra-ai/kfp-notebook/blob/c8f1298/etc/docker-scripts/bootstrapper.py#L205
        notebook_output_url = notebook_url.replace(".ipynb", "-output.ipynb")

        # instead return link to the generated output .html for the time being
        notebook_output_html = notebook_url.replace(".ipynb", ".html")

        return ApiRunCodeResponse(
            run_url=f"/runs/details/{run_id}",
            run_output_location=notebook_output_html), 200
    except Exception as e:

        return f"Error while trying to run notebook {id}: {e}", 500
def run_component(id, parameters, run_name=None):  # noqa: E501
    """run_component

    :param id:
    :type id: str
    :param parameters:
    :type parameters: List[ApiParameter]
    :param run_name: name to identify the run on the Kubeflow Pipelines UI, defaults to component name
    :type run_name: str

    :rtype: ApiRunCodeResponse
    """
    if KFP_HOST == "UNAVAILABLE":
        return f"Kubeflow Pipeline host is 'UNAVAILABLE'", 503

    if connexion.request.is_json:
        parameters = [
            ApiParameter.from_dict(d) for d in connexion.request.get_json()
        ]  # noqa: E501

    parameter_dict = {
        p.name: p.value
        for p in parameters if p.value and p.value.strip() != ""
    }

    api_component, status_code = get_component(id)

    if status_code > 200:
        return f"Component with id '{id}' does not exist", 404

    parameter_errors, status_code = validate_parameters(
        api_component.parameters, parameter_dict)

    if parameter_errors:
        return parameter_errors, status_code

    api_template, _ = get_component_template(id)

    enable_anonymous_read_access(bucket_name="mlpipeline",
                                 prefix="components/*")

    try:
        run_id = run_component_in_experiment(api_component, api_template.url,
                                             parameter_dict, run_name)
        return ApiRunCodeResponse(run_url=f"/runs/details/{run_id}"), 200

    except Exception as e:
        return f"Error while trying to run component {id}: {e}", 500
def run_custom_pipeline(run_custom_pipeline_payload, run_name=None):  # noqa: E501
    """run_custom_pipeline

    Run a complex pipeline defined by a directed acyclic graph (DAG)

    :param run_custom_pipeline_payload: A custom pipeline defined by a directed acyclic graph (DAG) and input parameters
    :type run_custom_pipeline_payload: dict | bytes
    :param run_name: Name to identify the run on the Kubeflow Pipelines UI
    :type run_name: str

    :rtype: ApiRunCodeResponse
    """
    if connexion.request.is_json:
        run_custom_pipeline_payload = ApiPipelineCustomRunPayload.from_dict(connexion.request.get_json())  # noqa: E501

    run_parameters = run_custom_pipeline_payload.run_parameters or {}
    custom_pipeline = run_custom_pipeline_payload.custom_pipeline

    # ensure unique task names
    task_names = [t.name for t in custom_pipeline.dag.tasks]
    duplicate_task_names = [name for name, count in Counter(task_names).items() if count > 1]
    assert not duplicate_task_names, f"duplicate task names: {duplicate_task_names}"

    # validate pipeline dependencies
    pipeline_tasks_by_name: typing.Dict[str, ApiPipelineTask] = {t.name: t for t in custom_pipeline.dag.tasks}
    for t in pipeline_tasks_by_name.values():
        for required_task_name in t.dependencies or []:
            assert required_task_name in pipeline_tasks_by_name, \
                f"missing task '{required_task_name}', as dependency for task '{t.name}'"

    # validate input parameters
    missing_run_parameters = {p.name for p in custom_pipeline.inputs.parameters
                              if p.default is None and p.value is None} - run_parameters.keys()
    assert not missing_run_parameters, f"missing parameters to run pipeline: {missing_run_parameters}"

    # make sure we enable anonymous read access to pipeline task components
    for artifact_type in set([t.artifact_type for t in pipeline_tasks_by_name.values()]):
        enable_anonymous_read_access(bucket_name="mlpipeline", prefix=f"{artifact_type}s/*")

    try:
        run_id = run_custom_pipeline_in_experiment(custom_pipeline, run_name, run_parameters)
        return ApiRunCodeResponse(run_url=f"/runs/details/{run_id}"), 200

    except Exception as e:
        # TODO: remove traceback?
        import traceback
        print(traceback.format_exc())
        return f"Error while trying to run custom pipeline '{run_name}': {e}", 500
def run_model(id,
              pipeline_stage,
              execution_platform,
              run_name=None,
              parameters: dict = None):  # noqa: E501
    """run_model

    :param id: 
    :type id: str
    :param pipeline_stage: pipeline stage, either 'train' or 'serve'
    :type pipeline_stage: str
    :param execution_platform: execution platform, i.e. 'kubernetes', 'knative'
    :type execution_platform: str
    :param run_name: name to identify the run on the Kubeflow Pipelines UI, defaults to model identifier
    :type run_name: str
    :param parameters: optional run parameters, must include 'github_token' and 'github_url' if credentials are required
    :type parameters: dict

    :rtype: ApiRunCodeResponse
    """
    if KFP_HOST == "UNAVAILABLE":
        return f"Kubeflow Pipeline host is 'UNAVAILABLE'", 503

    api_model, status_code = get_model(id)

    if status_code > 200:
        return f"Model with id '{id}' does not exist", 404

    parameter_errors, status_code = _validate_run_parameters(
        api_model, pipeline_stage, execution_platform, parameters)

    if parameter_errors:
        return parameter_errors, status_code

    try:
        run_id = run_model_in_experiment(api_model, pipeline_stage,
                                         execution_platform, run_name,
                                         parameters)
        return ApiRunCodeResponse(run_url=f"/runs/details/{run_id}"), 200

    except Exception as e:
        return f"Error while trying to run model with id '{id}': {e}", 500
def run_pipeline(id, run_name=None, parameters=None):  # noqa: E501
    """run_pipeline

    :param id:
    :type id: str
    :param run_name: name to identify the run on the Kubeflow Pipelines UI, defaults to pipeline name
    :type run_name: str
    :param parameters: optional run parameters, may be required based on pipeline definition
    :type parameters: dict

    :rtype: ApiRunCodeResponse
    """
    if KFP_HOST == "UNAVAILABLE":
        return f"Kubeflow Pipeline host is 'UNAVAILABLE'", 503

    if not parameters and connexion.request.is_json:
        parameter_dict = dict(connexion.request.get_json())  # noqa: E501
    else:
        parameter_dict = parameters

    api_pipeline, status_code = get_pipeline(id)

    if status_code > 200:
        return f"Pipeline with id '{id}' does not exist", 404

    parameter_errors, status_code = _validate_parameters(api_pipeline, parameter_dict)

    if parameter_errors:
        return parameter_errors, status_code

    try:
        run_id = run_pipeline_in_experiment(api_pipeline, parameter_dict, run_name)
        return ApiRunCodeResponse(run_url=f"/runs/details/{run_id}"), 200

    except Exception as e:
        return f"Error while trying to run pipeline {id}: {e}", 500