Example #1
0
def test_pipeline_with_comments():
    pipeline_json = _read_pipeline_resource("resources/sample_pipelines/" "pipeline_3_node_sample_with_comments.json")
    pipeline = PipelineParser().parse(pipeline_json)
    assert (
        pipeline.operations["d52ddfb4-dd0e-47ac-abc7-fa30bb95d45c"].doc
        == "Generate community stats and then aggregate them on an overview dashboard"
    )
Example #2
0
def test_missing_pipeline_name_should_default_to_untitled():
    pipeline_json = _read_pipeline_resource("resources/sample_pipelines/pipeline_valid.json")
    pipeline_json["pipelines"][0]["app_data"]["properties"].pop("name")

    pipeline = PipelineParser().parse(pipeline_json)

    assert pipeline.name == "untitled"
Example #3
0
def test_supernode_pipeline():
    pipeline_json = _read_pipeline_resource("resources/sample_pipelines/pipeline_with_supernode.json")

    pipeline = PipelineParser().parse(pipeline_json)

    assert len(pipeline.operations) == 4

    # Confirm structure of pipeline:
    # Two execution nodes feed their outputs to super-node with one execution_node.
    # Super-node's execution node, then sends its output to external execution node.
    # 4 nodes total.  Super-node execution node should have two parent-operations
    # pointing at first two nodes, and final node should have one parent pointing
    # at execution node WITHIN supernode.

    external_input_node_ids = ["db9f3f5b-b2e3-4824-aadd-c1c6bf652534", "f6584209-6f22-434f-9820-41327b6c749d"]
    supernode_excution_node_id = "079c0e12-eb5f-4fcc-983b-09e011869fee"
    external_node_id = "7628306d-2cc2-405c-94a1-fe42c95567a1"

    for node_id in pipeline.operations:
        # Validate operations list
        if node_id in external_input_node_ids:
            # These are input nodes, ensure parent_operation_ids are empty
            assert len(pipeline.operations[node_id].parent_operation_ids) == 0
            continue
        if node_id == supernode_excution_node_id:
            # Node within supernode, should have two parent_ops matching external_input_node_ids
            assert len(pipeline.operations[node_id].parent_operation_ids) == 2
            assert set(pipeline.operations[node_id].parent_operation_ids) == set(external_input_node_ids)
            continue
        if node_id == external_node_id:
            # Final external node, should have super_node embedded node as parent op.
            assert len(pipeline.operations[node_id].parent_operation_ids) == 1
            assert pipeline.operations[node_id].parent_operation_ids[0] == supernode_excution_node_id
            continue
        assert False, "Invalid node_id encountered in pipeline operations!"
Example #4
0
def test_pipeline_with_dependencies():
    pipeline_json = _read_pipeline_resource(
        "resources/sample_pipelines/" "pipeline_3_node_sample_with_dependencies.json"
    )

    pipeline = PipelineParser().parse(pipeline_json)

    assert len(pipeline.operations["acc4527d-7cc8-4c16-b520-5aa0f50a2e34"].parent_operation_ids) == 2
Example #5
0
def test_missing_operation_id():
    pipeline_json = _read_pipeline_resource("resources/sample_pipelines/pipeline_valid.json")
    pipeline_json["pipelines"][0]["nodes"][0].pop("id")

    with pytest.raises(ValueError) as e:
        PipelineParser().parse(pipeline_json)

    assert "Missing field 'operation id'" in str(e.value)
Example #6
0
def test_missing_operation_type():
    pipeline_json = _read_pipeline_resource("resources/sample_pipelines/pipeline_valid.json")
    pipeline_json["pipelines"][0]["nodes"][0].pop("type")

    with pytest.raises(ValueError) as e:
        PipelineParser().parse(pipeline_json)

    assert "Node type 'None' is invalid!" in str(e.value)
Example #7
0
def test_missing_operation_image():
    pipeline_json = _read_pipeline_resource("resources/sample_pipelines/pipeline_valid.json")
    pipeline_json["pipelines"][0]["nodes"][0]["app_data"]["component_parameters"].pop("runtime_image")

    with pytest.raises(ValueError) as e:
        PipelineParser().parse(pipeline_json)

    assert "Missing field 'operation runtime image'" in str(e.value)
Example #8
0
def test_missing_pipeline_runtime_configuration():
    pipeline_json = _read_pipeline_resource("resources/sample_pipelines/pipeline_valid.json")
    pipeline_json["pipelines"][0]["app_data"].pop("runtime_config")

    with pytest.raises(ValueError) as e:
        PipelineParser().parse(pipeline_json)

    assert "Invalid pipeline: Missing runtime configuration" in str(e.value)
Example #9
0
def test_invalid_node_type():
    pipeline_json = _read_pipeline_resource("resources/sample_pipelines/pipeline_valid.json")
    pipeline_json["pipelines"][0]["nodes"][0]["type"] = "foo"

    with pytest.raises(ValueError) as e:
        PipelineParser().parse(pipeline_json)

    assert "Node type 'foo' is invalid!" in str(e.value)
Example #10
0
def test_pipeline_global_attributes():
    pipeline_json = _read_pipeline_resource("resources/sample_pipelines/pipeline_valid.json")

    pipeline = PipelineParser().parse(pipeline_json)

    assert pipeline.name == "{{name}}"
    assert pipeline.runtime == "{{runtime}}"
    assert pipeline.runtime_config == "{{runtime-config}}"
Example #11
0
def test_pipeline_operations_and_handle_artifact_file_details():
    pipeline_json = _read_pipeline_resource("resources/sample_pipelines/pipeline_3_node_sample.json")

    pipeline = PipelineParser().parse(pipeline_json)

    assert len(pipeline.operations) == 3

    for op in pipeline.operations.values():
        assert "." not in op.name
Example #12
0
def test_pipeline_with_dirty_list_values(valid_operation):
    pipeline_json = _read_pipeline_resource("resources/sample_pipelines/pipeline_with_invalid_list_values.json")

    pipeline = PipelineParser().parse(pipeline_json)

    assert pipeline.name == "{{name}}"
    assert pipeline.runtime == "{{runtime}}"
    assert pipeline.runtime_config == "{{runtime-config}}"
    assert len(pipeline.operations) == 1
    assert pipeline.operations["{{uuid}}"] == valid_operation
Example #13
0
def test_pipeline_get_envs():

    # Ensure pipeline operation env lists are properly converted to dictionaries.

    pipeline_json = _read_pipeline_resource(
        "resources/sample_pipelines/pipeline_dependency_complex.json")

    pipeline = PipelineParser().parse(pipeline_json)

    for op in pipeline.operations.values():
        assert isinstance(op, GenericOperation)
        op_envs = op.env_vars.to_dict()
        assert op_envs["OP_NAME"] == op.name
Example #14
0
    async def post(self, *args, **kwargs):
        self.log.debug("Pipeline Export handler now executing post request")

        parent = self.settings.get("elyra")
        payload = self.get_json_body()

        self.log.debug(
            f"JSON payload: {json.dumps(payload, indent=2, separators=(',', ': '))}"
        )

        pipeline_definition = payload["pipeline"]
        pipeline_export_format = payload["export_format"]
        pipeline_export_path = payload["export_path"]
        pipeline_overwrite = payload["overwrite"]

        response = await PipelineValidationManager.instance().validate(
            pipeline=pipeline_definition)
        self.log.debug(
            f"Validation checks completed. Results as follows: {response.to_json()}"
        )

        if not response.has_fatal:
            pipeline = PipelineParser(
                root_dir=self.settings["server_root_dir"],
                parent=parent).parse(pipeline_definition)

            pipeline_exported_path = await PipelineProcessorManager.instance(
            ).export(pipeline, pipeline_export_format, pipeline_export_path,
                     pipeline_overwrite)
            json_msg = json.dumps({"export_path": pipeline_export_path})
            self.set_status(201)
            self.set_header("Content-Type", "application/json")
            location = url_path_join(self.base_url, "api", "contents",
                                     pipeline_exported_path)
            self.set_header("Location", location)
        else:
            json_msg = json.dumps({
                "reason":
                responses.get(400),
                "message":
                "Errors found in pipeline",
                "timestamp":
                datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "issues":
                response.to_json().get("issues"),
            })
            self.set_status(400)

        self.set_header("Content-Type", "application/json")
        await self.finish(json_msg)
Example #15
0
def _execute_pipeline(pipeline_definition) -> PipelineProcessorResponse:
    try:
        # parse pipeline
        pipeline_object = PipelineParser().parse(pipeline_definition)
        # process pipeline
        with warnings.catch_warnings():
            warnings.simplefilter("ignore")
            response = asyncio.get_event_loop().run_until_complete(
                PipelineProcessorManager.instance().process(pipeline_object)
            )
            return response
    except ValueError as ve:
        raise click.ClickException(f"Error parsing pipeline: \n {ve}")
    except RuntimeError as re:
        raise click.ClickException(f"Error processing pipeline: \n {re} \n {re.__cause__}")
Example #16
0
def test_pipeline_execution_order_in_simple_pipeline():
    expected_operation_names = ["f", "a", "c", "g"]
    pipeline_json = _read_pipeline_resource(
        "resources/sample_pipelines/pipeline_dependency_simple.json")

    pipeline = PipelineParser().parse(pipeline_json)
    current_ordered_operation_names = _get_operation_names(
        pipeline.operations.values())
    assert current_ordered_operation_names != expected_operation_names

    operations = LocalPipelineProcessor._sort_operations(
        operations_by_id=pipeline.operations)

    ordered_operation_names = _get_operation_names(operations)

    assert ordered_operation_names == expected_operation_names
Example #17
0
    async def post(self, *args, **kwargs):
        self.log.debug("Pipeline SchedulerHandler now executing post request")

        parent = self.settings.get("elyra")
        pipeline_definition = self.get_json_body()
        self.log.debug(f"JSON payload: {pipeline_definition}")

        response = await PipelineValidationManager.instance().validate(
            pipeline=pipeline_definition)

        self.log.debug(
            f"Validation checks completed. Results as follows: {response.to_json()}"
        )

        if not response.has_fatal:
            self.log.debug(
                "Processing the pipeline submission and executing request")
            pipeline = PipelineParser(
                root_dir=self.settings["server_root_dir"],
                parent=parent).parse(pipeline_definition)
            response = await PipelineProcessorManager.instance().process(
                pipeline)
            json_msg = json.dumps(response.to_json())
            self.set_status(200)
        else:
            json_msg = json.dumps({
                "reason":
                responses.get(400),
                "message":
                "Errors found in pipeline",
                "timestamp":
                datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
                "issues":
                response.to_json().get("issues"),
            })
            self.set_status(400)

        self.set_header("Content-Type", "application/json")
        await self.finish(json_msg)
Example #18
0
def export(pipeline_path, runtime_config, output, overwrite):
    """
    Export a pipeline to a runtime-specific format
    """

    click.echo()
    print_banner("Elyra pipeline export")

    rtc = _get_runtime_config(runtime_config)
    runtime_schema = rtc.schema_name
    runtime_type = rtc.metadata.get("runtime_type")

    pipeline_definition = _preprocess_pipeline(pipeline_path, runtime=runtime_schema, runtime_config=runtime_config)

    # Verify that the pipeline's runtime type is compatible with the
    # runtime configuration
    pipeline_runtime_type = _get_pipeline_runtime_type(pipeline_definition)
    if pipeline_runtime_type and pipeline_runtime_type != "Generic" and pipeline_runtime_type != runtime_type:
        raise click.BadParameter(
            f"The runtime configuration type '{runtime_type}' does not match "
            f"the pipeline's runtime type '{pipeline_runtime_type}'.",
            param_hint="--runtime-config",
        )

    resources = RuntimeTypeResources.get_instance_by_type(RuntimeProcessorType.get_instance_by_name(runtime_type))
    supported_export_formats = resources.get_export_extensions()
    if len(supported_export_formats) == 0:
        raise click.ClickException(f"Runtime type '{runtime_type}' does not support export.")

    # If, in the future, a runtime supports multiple export output formats,
    # the user can choose one. For now, choose the only option.
    selected_export_format = supported_export_formats[0]
    selected_export_format_suffix = f".{selected_export_format}"

    # generate output file name from the user-provided input
    if output is None:
        # user did not specify an output; use current directory
        # and derive the file name from the pipeline file name
        output_path = Path.cwd()
        filename = f"{Path(pipeline_path).stem}{selected_export_format_suffix}"
    else:
        if output.suffix == selected_export_format_suffix:
            # user provided a file name
            output_path = output.parent
            filename = output.name
        else:
            # user provided a directory
            output_path = output
            filename = f"{Path(pipeline_path).stem}{selected_export_format_suffix}"
    output_file = output_path.resolve() / filename

    # verify that the output path meets the prerequisites
    if not output_file.parent.is_dir():
        try:
            output_file.parent.mkdir(parents=True, exist_ok=True)
        except Exception as ex:
            raise click.BadParameter(f"Cannot create output directory: {ex}", param_hint="--output")

    # handle output overwrite
    if output_file.exists() and not overwrite:
        raise click.ClickException(
            f"Output file '{str(output_file)}' exists and " "option '--overwrite' was not specified."
        )

    if pipeline_runtime_type:
        _build_component_cache()

    # validate the pipeline
    try:
        _validate_pipeline_definition(pipeline_definition)
    except Exception:
        raise click.ClickException("Pipeline validation FAILED. The pipeline was not exported.")

    with Spinner(text="Exporting pipeline ..."):
        try:
            # parse pipeline
            pipeline_object = PipelineParser().parse(pipeline_definition)
            # process pipeline
            with warnings.catch_warnings():
                warnings.simplefilter("ignore")
                asyncio.get_event_loop().run_until_complete(
                    PipelineProcessorManager.instance().export(
                        pipeline_object, selected_export_format, str(output_file), True
                    )
                )
        except ValueError as ve:
            raise click.ClickException(f"Error parsing pipeline: \n {ve}")
        except RuntimeError as re:
            raise click.ClickException(f"Error exporting pipeline: \n {re} \n {re.__cause__}")

    click.echo(f"Pipeline was exported to '{str(output_file)}'.")
Example #19
0
def test_multinode_pipeline():
    pipeline_json = _read_pipeline_resource("resources/sample_pipelines/pipeline_3_node_sample.json")

    pipeline = PipelineParser().parse(pipeline_json)

    assert len(pipeline.operations) == 3
Example #20
0
def test_multiple_pipeline_definition():
    pipeline_json = _read_pipeline_resource("resources/sample_pipelines/" "pipeline_multiple_pipeline_definitions.json")

    with pytest.raises(ValueError):
        PipelineParser().parse(pipeline_json)
Example #21
0
def parsed_pipeline():
    pipeline_resource = _read_pipeline_resource(PIPELINE_FILE)
    return PipelineParser().parse(pipeline_definitions=pipeline_resource)
Example #22
0
def parsed_pipeline(request):
    pipeline_resource = _read_pipeline_resource(request.param)
    return PipelineParser().parse(pipeline_json=pipeline_resource)